Skip to content

Latest commit

 

History

History
308 lines (233 loc) · 9.56 KB

160201.md

File metadata and controls

308 lines (233 loc) · 9.56 KB
---------[p.224]--------------------------
Performing multiple groups' analysis in MS data

> data(iTRAQ)
> head(iTRAQ)
    prot       peptide  area113  area114  area115  area116  area117
1 P02654   EFGNTLEDKAR  1705.43  1459.10   770.65  3636.40  3063.48
2 P02654     EWFSETFQK  2730.41  1852.90  1467.65  2266.88  2269.57
3 P02654     IKQSELSAK 28726.38 15409.81 19050.13 58185.02 51416.05
4 P02654   LKEFGNTLEDK  4221.31  4444.28  2559.23  6859.71  5545.12
5 P02654 LKEFGNTLEDKAR 20209.66 14979.02 12164.94 37572.56 30687.57
6 P02654     EFGNTLEDK  4504.97  4871.88  2760.53  9213.41  6728.62
   area118  area119  area121
1  4046.73  2924.49  5767.87
2  3572.32  2064.82  2208.92
3 70721.05 38976.42 60359.72
4 11925.66  6371.50 15656.92
5 39176.99 34417.66 54439.22
6 14761.96  7796.29 18681.60
                                                                        desc
1 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
2 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
3 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
4 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
5 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
6 Apolipoprotein C-I OS=Homo sapiens GN=APOC1 PE=1 SV=1 sp|P02654|APOC1_HUMA
> 

> colnames(iTRAQ)
 [1] "prot"    "peptide" "area113" "area114" "area115" "area116" "area117"
 [8] "area118" "area119" "area121" "desc"   
> 
> 
> cond_1 <- numeric()
> cond_2 <- numeric()
> 
> cond_1
numeric(0)
> 
> for (i in c(3:6)){}
> cond_1 = cbind(cond_1,
+ asinh(tapply(iTRAQ[,i],paste(iTRAQ$prot),sum,na.rm=TRUE)))
> }
Error: unexpected '}' in "}"
> for (i in c(3:6)){
+ cond_1 = cbind(cond_1,
+ asinh(tapply(iTRAQ[,i],paste(iTRAQ$prot),sum,na.rm=TRUE)))
+ }
> for (i in c(7:10)){
+ cond_2 = cbind(cond_2,
+ asinh(tapply(iTRAQ[,i],paste(iTRAQ$prot),sum,na.rm=TRUE)))
+ }
> 
> pv<-c()
> 
> for(i in 1:nrow(cond_2)){
+ pv=c(pv,t.test(as.numeric(cond_1[i,]),as.numeric(cond_2[i,]))$p.value)
+ }
> 
> names(pv) <-levels(iTRAQ$prot)
> 

> plot(x=factor(names(pv)),y=-log10(pv),xlab="Proteins",ylab="-log P value")



---------[p.227]--------------------------

宿題

---------[p.155]--------------------------


Chap.6 Analyzing GWAS Data

- GWAS Project
- Single Nucleotide Polymorphisms (SNP)
- Human Genome Project(HGP) http://hapmap.ncbi.nlm.nih.gov/

- Online Mendelian Inheritance in Men(OMIM) http://omim.org/

参考サイト

    http://www.ims.u-tokyo.ac.jp/nakamura/matsuda/page3.html
    http://www.ebi.ac.uk/gwas/diagram GWAS catalog
    http://www.snpedia.com/index.php/SNPedia SNPedia
    http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002822


----[p.156]-------------------------------

The SNP association analysis(単一SNPと形質の関連解析)

> install.packages("SNPassoc")
Installing package into ‘C:/Users/ekaminuma/Documents/R/win-library/3.2’
(as ‘lib’ is unspecified)
> library(SNPassoc)

>

> data(SNPs)
 

> dim(SNPs)  157データx(35SNP+5=40)
[1] 157  40



> SNPs  データ確認(NAを確認)
     id casco    sex blood.pre    protein snp10001 snp10002 snp10003 snp10004
1     1     1 Female      13.7  75640.523       TT       CC       GG       GG
2     2     1 Female      12.7  28688.215       TT       AC       GG       GG
3     3     1 Female      12.9  17279.591       TT       CC       GG       GG
4     4     1   Male      14.6  27253.988       CT       CC       GG       GG
5     5     1 Female      13.4  38066.569       TT       AC       GG       GG
6     6     1 Female      11.3   9872.460       TT       CC       GG       GG
7     7     1 Female      11.9  11132.903       TT       AC       GG       GG

<中略>

148        GG        AA      <NA>        AA        AA        TT        TT
149        GG        AA      <NA>        AG        AG        TT        TT
150        GG        AA      <NA>        AG        AG        TT      <NA>
151        GG        AA      <NA>        AA        AA        TT        TT
152        AG        AA      <NA>        GG        GG        CT        TT
153        GG        AA      <NA>        AG        AG        TT        TT
154        AG        AA      <NA>        GG        GG        CT        TT
155        AG        AA      <NA>        GG        GG        CT        TT
156        AG        AA      <NA>        GG        GG        CT        TT
157        GG        AA      <NA>        AG        AG        TT        TT


> 


> head(SNPs)
  id casco    sex blood.pre  protein snp10001 snp10002 snp10003 snp10004 snp10005
1  1     1 Female      13.7 75640.52       TT       CC       GG       GG       GG
2  2     1 Female      12.7 28688.22       TT       AC       GG       GG       AG
3  3     1 Female      12.9 17279.59       TT       CC       GG       GG       GG
4  4     1   Male      14.6 27253.99       CT       CC       GG       GG       GG
5  5     1 Female      13.4 38066.57       TT       AC       GG       GG       GG
6  6     1 Female      11.3  9872.46       TT       CC       GG       GG       GG

> summary(SNPs) 157データの各SNPでの遺伝子型統計

 snp10009  snp100010  snp100011 snp100012 snp100013  snp100014 snp100015 snp100016 
 AA  :72   TT  :147   CC:  1    CC  : 3   AA  :101   AA  :27   AG: 13    GG  :152  
 AG  :79   NA's: 10   CG:  2    CG  :68   AG  : 35   AC  :74   GG:144    NA's:  5  
 GG  : 5              GG:154    GG  :84   GG  :  9   CC  :52                       
 NA's: 1                        NA's: 2   NA's: 12   NA's: 4                 

> SNPs.info.pos
         snp  chr     pos
1   snp10001 Chr1 2987398
2   snp10002 Chr1 1913558
3   snp10003 Chr1 1982067
4   snp10004 Chr1  447403
5   snp10005 Chr1 2212031
6   snp10006 Chr1 2515720
7   snp10007 Chr1 1306743
8   snp10008 Chr1 2063658
9   snp10009 Chr1 3403359
10 snp100010 Chr1 1857134
11 snp100011 Chr2 2439115
12 snp100012 Chr2 1978467
13 snp100013 Chr2 1641528
14 snp100014 Chr2 3852933
15 snp100015 Chr2 1490243
16 snp100016 Chr3 1146474
17 snp100017 Chr3  986342
18 snp100018 Chr3 3790658
19 snp100019 Chr3 3096200
20 snp100020 Chr3  531006
21 snp100021 Chr3 1470689
22 snp100022 Chr3  757945
23 snp100023 Chr4 1889538
24 snp100024 Chr4 3895592
25 snp100025 Chr4 2623465
26 snp100026 Chr4 1913775
27 snp100027 Chr4  384855
28 snp100028 Chr4 3275353
29 snp100029 Chr4 1218029
30 snp100030 Chr4 2876775
31 snp100031 Chr4 2119508
32 snp100032 Chr4 3018951
33 snp100033 Chr4 2031700
34 snp100034 Chr4 2941667
35 snp100035 Chr4 1920963

> mySNP<-setupSNP(SNPs,6:40,sep="")
> head(mySNP)
  id casco    sex blood.pre  protein snp10001 snp10002 snp10003 snp10004 snp10005
1  1     1 Female      13.7 75640.52      T/T      C/C      G/G      G/G      G/G
2  2     1 Female      12.7 28688.22      T/T      A/C      G/G      G/G      A/G
3  3     1 Female      12.9 17279.59      T/T      C/C      G/G      G/G      G/G
4  4     1   Male      14.6 27253.99      C/T      C/C      G/G      G/G      G/G
5  5     1 Female      13.4 38066.57      T/T      A/C      G/G      G/G      G/G
6  6     1 Female      11.3  9872.46      T/T      C/C      G/G      G/G      G/G


*****************************************

関連解析 「association関数」

ケース・コントロール研究

https://cran.r-project.org/web/packages/SNPassoc/SNPassoc.pdfより

association(formula, data, model=c("all"), model.interaction=
c("codominant"), subset, name.snp = NULL, quantitative =
is.quantitative(formula,data), genotypingRate= 0,
level = 0.95, ...)


> myres<- association(casco~sex+snp10001+blood.pre,data=mySNP,model.interaction=c("dominant","codominant"))


> myres

SNP: snp10001  adjusted by: sex blood.pre 
              0    %   1    %   OR lower upper p-value   AIC
Codominant                                                  
T/T          24 51.1  68 61.8 1.00             0.15410 195.8
C/T          21 44.7  32 29.1 0.55  0.26  1.14              
C/C           2  4.3  10  9.1 1.74  0.35  8.63              
Dominant                                                    
T/T          24 51.1  68 61.8 1.00             0.22859 196.1
C/T-C/C      23 48.9  42 38.2 0.65  0.32  1.31              
Recessive                                                   
T/T-C/T      45 95.7 100 90.9 1.00             0.28494 196.4
C/C           2  4.3  10  9.1 2.22  0.46 10.70              
Overdominant                                                
T/T-C/C      26 55.3  78 70.9 1.00             0.07188 194.3
C/T          21 44.7  32 29.1 0.52  0.25  1.06              
log-Additive                                                
0,1,2        47 29.9 110 70.1 0.87  0.51  1.49 0.60861 197.3

----[p.158]-----------------------

How it works



https://www.dynacom.co.jp/product_service/packages/snpalyze/sa_t1_cc.html

***************

> x2<- matrix(c(38,24,34,46),ncol=2)
> x2
     [,1] [,2]
[1,]   38   34
[2,]   24   46
> chisq.test(x2)

	Pearson's Chi-squared test with Yates' continuity correction

data:  x2
X-squared = 4.211, df = 1, p-value = 0.04016

**************

> x3<- matrix(c(38,29,15,36,39,45),ncol=2)
> x3
     [,1] [,2]
[1,]   38   36
[2,]   29   39
[3,]   15   45
> 
> chisq.test(x3)

	Pearson's Chi-squared test

data:  x3
X-squared = 9.7201, df = 2, p-value = 0.00775




---[p.159]-------------------------------

Running association scans for SNP(複数SNPと形質の関連解析)


> xtmp<- matrix(c(24,21,2,68,32,10),ncol=2)
> xtmp
     [,1] [,2]
[1,]   24   68
[2,]   21   32
[3,]    2   10
> chisq.test(xtmp)

	Pearson's Chi-squared test

data:  xtmp
X-squared = 4.0282, df = 2, p-value = 0.1334

Warning message:
In chisq.test(xtmp) :  カイ自乗近似は不正確かもしれません


宿題:SNP10001,10002, 10005,10008,10019,10020についてカイ2乗検定を行う。