---[p.251]---------------------------------------------
The differential analysis of NGS data using limma
limma package :Linear Models for Microarray Data
参考:http://www.statsci.org/smyth/pubs/limma-biocbook-reprint.pdf
limmaでは、修正t統計量使う(21p下のtjk)
> source("http://bioconductor.org/biocLite.R")
Bioconductor version 3.1 (BiocInstaller 1.18.5), ?biocLite for help
A newer version of Bioconductor is available for this version of R,
?BiocUpgrade for help
> biocLite(c("DESeq","pasilla"))
> library(limma)
> library(DESeq)
要求されたパッケージ BiocGenerics をロード中です
要求されたパッケージ parallel をロード中です
> library(pasilla)
> data(pasillaGenes)
> pasillaGenes
CountDataSet (storageMode: environment)
assayData: 14470 features, 7 samples
element names: counts
protocolData: none
phenoData
sampleNames: treated1fb treated2fb ... untreated4fb (7
total)
varLabels: sizeFactor condition type
varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
pubMedIds: 20921232
Annotation:
>
> eset <- counts(pasillaGenes)
>
> head(eset)
treated1fb treated2fb treated3fb untreated1fb untreated2fb
FBgn0000003 0 0 1 0 0
FBgn0000008 78 46 43 47 89
FBgn0000014 2 0 0 0 0
FBgn0000015 1 0 1 0 1
FBgn0000017 3187 1672 1859 2445 4615
FBgn0000018 369 150 176 288 383
untreated3fb untreated4fb
FBgn0000003 0 0
FBgn0000008 53 27
FBgn0000014 1 0
FBgn0000015 1 2
FBgn0000017 2063 1711
FBgn0000018 135 174
> colnames(eset) <- c(paste("T",1:3,sep="_"),paste("C",1:4,sep="_"))
>
> head(eset)
T_1 T_2 T_3 C_1 C_2 C_3 C_4
FBgn0000003 0 0 1 0 0 0 0
FBgn0000008 78 46 43 47 89 53 27
FBgn0000014 2 0 0 0 0 1 0
FBgn0000015 1 0 1 0 1 1 2
FBgn0000017 3187 1672 1859 2445 4615 2063 1711
FBgn0000018 369 150 176 288 383 135 174
> design<- cbind(Intercept=1,trt=c(1,1,1,0,0,0,0))
>
> design
Intercept trt
[1,] 1 1
[2,] 1 1
[3,] 1 1
[4,] 1 0
[5,] 1 0
[6,] 1 0
[7,] 1 0
>
> eset_voom<-voom(eset,design,plot=FALSE)
> eset_voom
An object of class "EList"
$E
T_1 T_2 T_3 C_1 C_2
FBgn0000003 -4.307920 -3.509626 -2.119090 -3.871808 -4.553117
FBgn0000008 2.986700 3.029533 2.738891 2.698047 2.930699
FBgn0000014 -1.985992 -3.509626 -3.704053 -3.871808 -4.553117
FBgn0000015 -2.722958 -3.509626 -2.119090 -3.871808 -2.968154
FBgn0000017 8.330289 8.198164 8.156646 8.384105 8.619154
C_3 C_4
FBgn0000003 -3.413730 -3.572297
FBgn0000008 3.327737 2.209062
FBgn0000014 -1.828768 -3.572297
FBgn0000015 -1.828768 -1.250369
FBgn0000017 8.597148 8.168748
14465 more rows ...
:
> summary(eset_voom)
Length Class Mode
E 101290 -none- numeric
weights 101290 -none- numeric
design 14 -none- numeric
targets 1 data.frame list
---------修正t検定-----------------
> fit<- lmFit(eset_voom,design)
> fitE <- eBayes(fit)
>
> summary(fit)
Length Class Mode
coefficients 28940 -none- numeric
stdev.unscaled 28940 -none- numeric
sigma 14470 -none- numeric
df.residual 14470 -none- numeric
cov.coefficients 4 -none- numeric
pivot 2 -none- numeric
rank 1 -none- numeric
Amean 14470 -none- numeric
method 1 -none- character
design 14 -none- numeric
> summary(fitE)
Length Class Mode
coefficients 28940 -none- numeric
stdev.unscaled 28940 -none- numeric
sigma 14470 -none- numeric
df.residual 14470 -none- numeric
cov.coefficients 4 -none- numeric
pivot 2 -none- numeric
rank 1 -none- numeric
Amean 14470 -none- numeric
method 1 -none- character
design 14 -none- numeric
df.prior 1 -none- numeric
s2.prior 1 -none- numeric
var.prior 2 -none- numeric
proportion 1 -none- numeric
s2.post 14470 -none- numeric
t 28940 -none- numeric
df.total 14470 -none- numeric
p.value 28940 -none- numeric
lods 28940 -none- numeric
F 14470 -none- numeric
F.p.value 14470 -none- numeric
遺伝子数多の為に、多重比較によるP値調整が必要になる。P値をBenjamini-Hochberg法(BH法、もしくはFDR法)で調整する。調整P値はAdjusted P-Value、もしくはP-Valueと呼ばれる。
参考:
http://www.med.osaka-u.ac.jp/pub/kid/clinicaljournalclub1.html
http://stat.biopapyrus.net/multivariate-test/fdr-controlling-bh.html
> top1<-topTable(fitE,n=nrow(eset),coef=2,
adjust="BH")
>
> summary(top1)
logFC AveExpr t
Min. :-4.73272 Min. :-3.848 Min. :-22.05748
1st Qu.:-0.10538 1st Qu.:-3.395 1st Qu.: -0.48128
Median : 0.01824 Median : 1.384 Median : 0.03608
Mean : 0.06870 Mean : 1.411 Mean : 0.25334
3rd Qu.: 0.23333 3rd Qu.: 5.536 3rd Qu.: 0.99811
Max. : 4.19218 Max. :13.762 Max. : 14.10843
P.Value adj.P.Val B
Min. :0.0000 Min. :0.0000029 Min. :-7.251
1st Qu.:0.1771 1st Qu.:0.7027384 1st Qu.:-6.315
Median :0.4848 Median :0.9689724 Median :-5.579
Mean :0.5060 Mean :0.7990241 Mean :-5.392
3rd Qu.:0.8750 3rd Qu.:0.9867547 3rd Qu.:-5.199
Max. :1.0000 Max. :0.9999648 Max. :13.860
>
> topgenes1 <- rownames(top1[which(top1$adj.P.Val<0.05),])
> topgenes1
[1] "FBgn0029167"
[2] "FBgn0035085"
[3] "FBgn0039155"
[4] "FBgn0001226"
[5] "FBgn0011260"
[6] "FBgn0034736"
[7] "FBgn0029896"
[8] "FBgn0000071"
[9] "FBgn0051092"
:
---------------------------------------
---------------------------------------
> hist(top1$adj.P.Val,xlab="Adj.p-value",main="Histogram for p-value")
> clr <- rep("black",nrow(top1))
>
> clr[which(top1$adj.P.Val<0.05)]
[1] "black" "black" "black" "black" "black" "black" "black" "black"
> plot(x=top1$logFC,y=-log10(top1$adj.P.Val),col=clr,xlab="log fold change",ylab="-logP",main="Volcano Plot")
>
> abline(h=-log10(0.05),col="blue")
> rownames(top1[which(top1$adj.P.Val<0.05&abs(top1$logFC)>2),])
[1] "FBgn0029167" "FBgn0035085" "FBgn0039155" "FBgn0011260"
[5] "FBgn0034736" "FBgn0029896" "FBgn0000071" "FBgn0051092"
[9] "FBgn0026562" "FBgn0003501" "FBgn0033764" "FBgn0035189"
[13] "FBgn0034434" "FBgn0037290" "FBgn0260011" "FBgn0024288"
[17] "FBgn0051642" "FBgn0034438" "FBgn0038832" "FBgn0032405"
[21] "FBgn0020248" "FBgn0052407" "FBgn0261284" "FBgn0040827"
[25] "FBgn0038198" "FBgn0030598" "FBgn0039827" "FBgn0050463"
[29] "FBgn0039593" "FBgn0037754" "FBgn0030763" "FBgn0085359"
[33] "FBgn0033065" "FBgn0030041" "FBgn0010387" "FBgn0038237"
[37] "FBgn0032436" "FBgn0053318" "FBgn0038012" "FBgn0063667"
[41] "FBgn0030964" "FBgn0033760" "FBgn0051555" "FBgn0046258"
[45] "FBgn0037143" "FBgn0259236" "FBgn0037223" "FBgn0037191"
[49] "FBgn0002578" "FBgn0051663" "FBgn0052700" "FBgn0039937"
[53] "FBgn0033733" "FBgn0050324" "FBgn0034898" "FBgn0028939"
[57] "FBgn0051776" "FBgn0032770" "FBgn0020639