Skip to content

Latest commit

 

History

History
227 lines (196 loc) · 7.85 KB

160627.md

File metadata and controls

227 lines (196 loc) · 7.85 KB
---[p.251]---------------------------------------------
The differential analysis of NGS data using limma

limma package :Linear Models for Microarray Data
参考:http://www.statsci.org/smyth/pubs/limma-biocbook-reprint.pdf

limmaでは、修正t統計量使う(21p下のtjk)

> source("http://bioconductor.org/biocLite.R")
Bioconductor version 3.1 (BiocInstaller 1.18.5), ?biocLite for help
A newer version of Bioconductor is available for this version of R,
  ?BiocUpgrade for help
> biocLite(c("DESeq","pasilla"))
> library(limma)
> library(DESeq)
 要求されたパッケージ BiocGenerics をロード中です 
 要求されたパッケージ parallel をロード中です 

> library(pasilla)
> data(pasillaGenes)
> pasillaGenes
CountDataSet (storageMode: environment)
assayData: 14470 features, 7 samples 
  element names: counts 
protocolData: none
phenoData
  sampleNames: treated1fb treated2fb ... untreated4fb (7
    total)
  varLabels: sizeFactor condition type
  varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
  pubMedIds: 20921232 
Annotation:  

> 
> eset <- counts(pasillaGenes)
> 
> head(eset)
            treated1fb treated2fb treated3fb untreated1fb untreated2fb
FBgn0000003          0          0          1            0            0
FBgn0000008         78         46         43           47           89
FBgn0000014          2          0          0            0            0
FBgn0000015          1          0          1            0            1
FBgn0000017       3187       1672       1859         2445         4615
FBgn0000018        369        150        176          288          383
            untreated3fb untreated4fb
FBgn0000003            0            0
FBgn0000008           53           27
FBgn0000014            1            0
FBgn0000015            1            2
FBgn0000017         2063         1711
FBgn0000018          135          174

> colnames(eset) <- c(paste("T",1:3,sep="_"),paste("C",1:4,sep="_"))
> 
> head(eset)
             T_1  T_2  T_3  C_1  C_2  C_3  C_4
FBgn0000003    0    0    1    0    0    0    0
FBgn0000008   78   46   43   47   89   53   27
FBgn0000014    2    0    0    0    0    1    0
FBgn0000015    1    0    1    0    1    1    2
FBgn0000017 3187 1672 1859 2445 4615 2063 1711
FBgn0000018  369  150  176  288  383  135  174
> design<- cbind(Intercept=1,trt=c(1,1,1,0,0,0,0))
> 
> design
     Intercept trt
[1,]         1   1
[2,]         1   1
[3,]         1   1
[4,]         1   0
[5,]         1   0
[6,]         1   0
[7,]         1   0
> 

> eset_voom<-voom(eset,design,plot=FALSE)
> eset_voom
An object of class "EList"
$E
                  T_1       T_2       T_3       C_1       C_2
FBgn0000003 -4.307920 -3.509626 -2.119090 -3.871808 -4.553117
FBgn0000008  2.986700  3.029533  2.738891  2.698047  2.930699
FBgn0000014 -1.985992 -3.509626 -3.704053 -3.871808 -4.553117
FBgn0000015 -2.722958 -3.509626 -2.119090 -3.871808 -2.968154
FBgn0000017  8.330289  8.198164  8.156646  8.384105  8.619154
                  C_3       C_4
FBgn0000003 -3.413730 -3.572297
FBgn0000008  3.327737  2.209062
FBgn0000014 -1.828768 -3.572297
FBgn0000015 -1.828768 -1.250369
FBgn0000017  8.597148  8.168748
14465 more rows ...
:

> summary(eset_voom)
        Length Class      Mode   
E       101290 -none-     numeric
weights 101290 -none-     numeric
design      14 -none-     numeric
targets      1 data.frame list   

---------修正t検定-----------------
> fit<- lmFit(eset_voom,design)
> fitE <- eBayes(fit)
> 

> summary(fit)
                 Length Class  Mode     
coefficients     28940  -none- numeric  
stdev.unscaled   28940  -none- numeric  
sigma            14470  -none- numeric  
df.residual      14470  -none- numeric  
cov.coefficients     4  -none- numeric  
pivot                2  -none- numeric  
rank                 1  -none- numeric  
Amean            14470  -none- numeric  
method               1  -none- character
design              14  -none- numeric  
> summary(fitE)
                 Length Class  Mode     
coefficients     28940  -none- numeric  
stdev.unscaled   28940  -none- numeric  
sigma            14470  -none- numeric  
df.residual      14470  -none- numeric  
cov.coefficients     4  -none- numeric  
pivot                2  -none- numeric  
rank                 1  -none- numeric  
Amean            14470  -none- numeric  
method               1  -none- character
design              14  -none- numeric  
df.prior             1  -none- numeric  
s2.prior             1  -none- numeric  
var.prior            2  -none- numeric  
proportion           1  -none- numeric  
s2.post          14470  -none- numeric  
t                28940  -none- numeric  
df.total         14470  -none- numeric  
p.value          28940  -none- numeric  
lods             28940  -none- numeric  
F                14470  -none- numeric  
F.p.value        14470  -none- numeric  

遺伝子数多の為に、多重比較によるP値調整が必要になる。P値をBenjamini-Hochberg法(BH法、もしくはFDR法)で調整する。調整P値はAdjusted P-Value、もしくはP-Valueと呼ばれる。

参考:
http://www.med.osaka-u.ac.jp/pub/kid/clinicaljournalclub1.html
http://stat.biopapyrus.net/multivariate-test/fdr-controlling-bh.html


> top1<-topTable(fitE,n=nrow(eset),coef=2,
adjust="BH")


> 
> summary(top1)
     logFC             AveExpr             t            
 Min.   :-4.73272   Min.   :-3.848   Min.   :-22.05748  
 1st Qu.:-0.10538   1st Qu.:-3.395   1st Qu.: -0.48128  
 Median : 0.01824   Median : 1.384   Median :  0.03608  
 Mean   : 0.06870   Mean   : 1.411   Mean   :  0.25334  
 3rd Qu.: 0.23333   3rd Qu.: 5.536   3rd Qu.:  0.99811  
 Max.   : 4.19218   Max.   :13.762   Max.   : 14.10843  
    P.Value         adj.P.Val               B         
 Min.   :0.0000   Min.   :0.0000029   Min.   :-7.251  
 1st Qu.:0.1771   1st Qu.:0.7027384   1st Qu.:-6.315  
 Median :0.4848   Median :0.9689724   Median :-5.579  
 Mean   :0.5060   Mean   :0.7990241   Mean   :-5.392  
 3rd Qu.:0.8750   3rd Qu.:0.9867547   3rd Qu.:-5.199  
 Max.   :1.0000   Max.   :0.9999648   Max.   :13.860  
> 
> topgenes1 <- rownames(top1[which(top1$adj.P.Val<0.05),])
> topgenes1
  [1] "FBgn0029167"                        
  [2] "FBgn0035085"                        
  [3] "FBgn0039155"                        
  [4] "FBgn0001226"                        
  [5] "FBgn0011260"                        
  [6] "FBgn0034736"                        
  [7] "FBgn0029896"                        
  [8] "FBgn0000071"                        
  [9] "FBgn0051092"                        
   :
---------------------------------------


---------------------------------------


> hist(top1$adj.P.Val,xlab="Adj.p-value",main="Histogram for p-value")



> clr <- rep("black",nrow(top1))
> 
> clr[which(top1$adj.P.Val<0.05)]
  [1] "black" "black" "black" "black" "black" "black" "black" "black"






> plot(x=top1$logFC,y=-log10(top1$adj.P.Val),col=clr,xlab="log fold change",ylab="-logP",main="Volcano Plot")
> 
> abline(h=-log10(0.05),col="blue")

> rownames(top1[which(top1$adj.P.Val<0.05&abs(top1$logFC)>2),])
 [1] "FBgn0029167" "FBgn0035085" "FBgn0039155" "FBgn0011260"
 [5] "FBgn0034736" "FBgn0029896" "FBgn0000071" "FBgn0051092"
 [9] "FBgn0026562" "FBgn0003501" "FBgn0033764" "FBgn0035189"
[13] "FBgn0034434" "FBgn0037290" "FBgn0260011" "FBgn0024288"
[17] "FBgn0051642" "FBgn0034438" "FBgn0038832" "FBgn0032405"
[21] "FBgn0020248" "FBgn0052407" "FBgn0261284" "FBgn0040827"
[25] "FBgn0038198" "FBgn0030598" "FBgn0039827" "FBgn0050463"
[29] "FBgn0039593" "FBgn0037754" "FBgn0030763" "FBgn0085359"
[33] "FBgn0033065" "FBgn0030041" "FBgn0010387" "FBgn0038237"
[37] "FBgn0032436" "FBgn0053318" "FBgn0038012" "FBgn0063667"
[41] "FBgn0030964" "FBgn0033760" "FBgn0051555" "FBgn0046258"
[45] "FBgn0037143" "FBgn0259236" "FBgn0037223" "FBgn0037191"
[49] "FBgn0002578" "FBgn0051663" "FBgn0052700" "FBgn0039937"
[53] "FBgn0033733" "FBgn0050324" "FBgn0034898" "FBgn0028939"
[57] "FBgn0051776" "FBgn0032770" "FBgn0020639