diff --git a/index.html b/index.html index 37f206e..d7bc828 100644 --- a/index.html +++ b/index.html @@ -296,5 +296,5 @@ diff --git a/misc/coloc.R b/misc/coloc.R index 579a7d9..10a2f2e 100644 --- a/misc/coloc.R +++ b/misc/coloc.R @@ -1,15 +1,20 @@ -liftRegion <- function(x,flanking=1e6) +liftRegion <- function(x, flanking = 1e6) { - gr <- with(x,GenomicRanges::GRanges(seqnames=chr,IRanges::IRanges(start,end))+flanking) + gr <- GenomicRanges::GRanges(seqnames = x$chr, + ranges = IRanges::IRanges(start = x$start - flanking, end = x$end + flanking)) seqlevelsStyle(gr) <- "UCSC" + f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain") + chain <- rtracklayer::import.chain(f) gr38 <- rtracklayer::liftOver(gr, chain) - chr <- gsub("chr","",colnames(table(seqnames(gr38)))) - start <- min(unlist(start(gr38))) - end <- max(unlist(end(gr38))) - invisible(list(chr=chr[1],start=start,end=end,region=paste0(chr[1],":",start,"-",end))) + lifted_chromosomes <- seqnames(gr38) + lifted_start <- min(start(gr38)) + lifted_end <- max(end(gr38)) + chr <- gsub("chr", "", as.character(lifted_chromosomes[1])) + region <- paste0(chr, ":", lifted_start, "-", lifted_end) + invisible(list(chr = chr, start = lifted_start, end = lifted_end, region = region)) } -sumstats <- function(prot,chr,region37) +sumstats <- function(prot,chr,region37,chain) { cat("GWAS sumstats\n") tbl <- file.path(analysis,"METAL_dr",paste0(prot,"_dr-1.tbl.gz")) @@ -20,6 +25,8 @@ sumstats <- function(prot,chr,region37) gwas_granges <- with(gwas_stats,GRanges(seqnames = paste0("chr",dplyr::if_else(Chromosome==23,"X",paste(Chromosome))), ranges = IRanges(start = Position, end = Position), id = ID,REF=Allele2,ALT=Allele1,AF=Freq1,ES=Effect,SE=StdErr,LP=-logP,SS=N)) + f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain") + chain <- rtracklayer::import.chain(f) gwas_stats_hg38 <- rtracklayer::liftOver(gwas_granges, chain) %>% unlist() %>% dplyr::as_tibble() %>% @@ -84,7 +91,11 @@ gtex <- function(gwas_stats_hg38,ensGene,region38) ~dplyr::filter(., !is.na(se))) invisible(sapply(1:49, function(i) { f <- file.path(analysis, "coloc", "GTEx", "sumstats", paste0(prot, "-", names(result_filtered)[i], ".gz")) - write.table(result_filtered[[i]], file = gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t") + gz <- gzfile(f, "w") + if (!is.null(result_filtered[[i]])) { + write.table(result_filtered[[i]], file = gz, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t") + } + close(gz) })) purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "qtl_id") } @@ -106,17 +117,18 @@ ge <- function(gwas_stats_hg38,ensGene,region38) result_filtered <- purrr::map(result_list[lapply(result_list,nrow)!=0], ~dplyr::filter(., !is.na(se))) invisible(sapply(1:length(result_filtered), function(i) { - f <- file.path(analysis, "coloc", "eQTLCatalogue", "sumstats", paste0(prot, "-", names(result_filtered)[i], ".gz")) - write.table(result_filtered[[i]], file = gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t") + gz <- gzfile(f, "w") + if (!is.null(result_filtered[[i]])) { + write.table(result_filtered[[i]], file = gz, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t") + } + close(gz) })) purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "unique_id") } -gtex_coloc <- function(prot,snp,chr,ensGene,region37,region38,out) +gtex_coloc <- function(prot,chr,ensGene,chain,region37,region38,out) { - gwas_stats_hg38 <- sumstats(prot,chr,region37) - f <- file.path(psum,paste0(prot,"-",snp,".gz")) - write.table(gwas_stats_hg38,file=gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t") + gwas_stats_hg38 <- sumstats(prot,chr,region37,chain) df_gtex <- gtex(gwas_stats_hg38,ensGene,region38) if (!exists("df_gtex")) return saveRDS(df_gtex,file=paste0(out,".rds")) @@ -128,7 +140,7 @@ gtex_coloc <- function(prot,snp,chr,ensGene,region37,region38,out) height = 15, width = 15, units = "cm", dpi = 300) } -ge_coloc <- function(prot,chr,ensGene,region37,region38,out) +ge_coloc <- function(prot,chr,ensGene,chain,region37,region38,out) { gwas_stats_hg38 <- sumstats(prot,chr,region37) df_ge <- ge(gwas_stats_hg38,ensGene,region38) @@ -142,7 +154,7 @@ ge_coloc <- function(prot,chr,ensGene,region37,region38,out) height = 15, width = 15, units = "cm", dpi = 300) } -all_coloc <- function(prot,chr,ensGene,region37,region38,out) +all_coloc <- function(prot,chr,ensGene,chain,region37,region38,out) { gwas_stats_hg38 <- sumstats(prot,chr,region37) df_microarray <- microarray(gwas_stats_hg38,ensGene,region38) @@ -164,25 +176,27 @@ all_coloc <- function(prot,chr,ensGene,region37,region38,out) single_run <- function(r, batch="GTEx") { - ss <- subset(pQTLdata::caprion,Protein==paste0(prot,"_HUMAN")) - ensGene <- ss[["ensGenes"]] + sentinel <- sentinels[r,] + chr <- with(sentinel,geneChrom) ensRegion37 <- with(sentinel, { start <- geneStart-M if (start<0) start <- 0 end <- geneEnd+M - paste0(geneChrom,":",start,"-",end) + paste0(chr,":",start,"-",end) }) - x <- list(chr=geneChrom,start=geneStart,end=geneEnd) + ss <- subset(pQTLdata::caprion,Protein==paste0(sentinel[["prot"]],"_HUMAN")) + ensGene <- ss[["ensGenes"]] + x <- with(sentinel,list(chr=geneChrom,start=geneStart,end=geneEnd)) lr <- liftRegion(x) ensRegion38 <- with(lr,paste0(chr,":",start-M,"-",end+M)) - cat(geneChrom,ensGene,ensRegion37,ensRegion38,"\n") - f <- file.path(analysis,"coloc",batch,paste0(prot,"-",snp)) + cat(chr,ensGene,ensRegion37,ensRegion38,"\n") + f <- file.path(analysis,"coloc",batch,with(sentinel,paste0(prot,"-",SNP))) if (batch=="GTEx") { - gtex_coloc(prot,snp,geneChrom,ensGene,ensRegion37,ensRegion38,f) + gtex_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f) } else { - ge_coloc(prot,geneChrom,ensGene,ensRegion37,ensRegion38,f) + ge_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f) } } @@ -202,6 +216,15 @@ collect <- function(batch="GTEx") if (nrow(rds)==0) next df_coloc <- rbind(df_coloc,data.frame(prot=prot,rsid=rsid,snpid=snpid,rds)) } + f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain") + chain <- rtracklayer::import.chain(f) + gr <- with(arrange(pQTLdata::caprion,Protein), { + GenomicRanges::GRanges(seqnames = chr, + ranges = IRanges::IRanges(start = start - M, end = end + M), + Protein=Protein,Gene=Gene) + }) + seqlevelsStyle(gr) <- "UCSC" + gr38=rtracklayer::liftOver(gr,chain) caprion_upd <- pQTLdata::caprion %>% mutate(prot=gsub("_HUMAN","",Protein),gene=Gene) df <- dplyr::rename(df_coloc,H0=PP.H0.abf,H1=PP.H1.abf,H2=PP.H2.abf,H3=PP.H3.abf,H4=PP.H4.abf) %>% @@ -244,6 +267,11 @@ collect <- function(batch="GTEx") loop_slowly <- function() for (r in 1:nrow(sentinels)) single_run(r) +# Environmental variables + +pkgs <- c("dplyr", "gap", "ggplot2", "readr", "coloc", "GenomicRanges","pQTLtools","rtracklayer","seqminer") +invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE))) + options(width=200) HOME <- Sys.getenv("HOME") HPC_WORK <- Sys.getenv("HPC_WORK") @@ -256,9 +284,6 @@ if (!dir.exists(gsum)) dir.create(gsum) esum <- file.path(analysis,"coloc","eQTLCatalogue","sumstats") if (!dir.exists(esum)) dir.create(esum) -pkgs <- c("dplyr", "gap", "ggplot2", "readr", "coloc", "GenomicRanges","pQTLtools","rtracklayer","seqminer") -invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE))) - sevens <- " ENSG00000131142 - CCL25 19 8052318 8062660 ENSG00000125735 - TNFSF14 19 6661253 6670588 @@ -274,17 +299,8 @@ caprion <- left_join(pQTLdata::caprion,updates) sentinels <- subset(read.csv(file.path(analysis,"work","caprion_dr.cis.vs.trans")),cis) fp <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","tabix_ftp_paths.tsv") tabix_paths <- read.delim(fp, stringsAsFactors = FALSE) %>% dplyr::as_tibble() -f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain") -chain <- rtracklayer::import.chain(f) r <- as.integer(Sys.getenv("r")) -sentinel <- sentinels[r,] -prot <- sentinel[["prot"]] -snp <- sentinel[["SNP"]] -geneChrom <- sentinel[["geneChrom"]] -geneStart <- sentinel[["geneStart"]] -geneEnd <- sentinel[["geneEnd"]] - single_run(r) single_run(r,batch="eQTLCatalogue") diff --git a/misc/coloc.sb b/misc/coloc.sb index ce9e839..02e3800 100644 --- a/misc/coloc.sb +++ b/misc/coloc.sb @@ -152,6 +152,12 @@ function run_coloc() function coloc() # GTEx, eQTL-Catalogue { + for dir in GTEx/sumstats eQTLCatalogue/sumstats log slurm + do + if [ ! -d ${dir} ]; then + mkdir -p ${analysis}/coloc/${dir} + fi + done export r=${SLURM_ARRAY_TASK_ID} export cvt=${analysis}/work/caprion_dr.cis.vs.trans read prot MarkerName < \ @@ -160,9 +166,9 @@ function coloc() echo ${r} - ${prot} - ${MarkerName} export prot=${prot} export MarkerName=${MarkerName} - cd ${analysis}/coloc - R --no-save < ${analysis}/misc/coloc.R 2>&1 | \ - tee log/${prot}-${MarkerName}.log + cd ${analysis} + R --no-save < coloc.R 2>&1 | \ + tee ${analysis}/coloc/log/${prot}-${MarkerName}.log cd - } diff --git a/peptide_progs/3.2_collect.sh b/peptide_progs/3.2_collect.sh index b74ac46..0b286c3 100644 --- a/peptide_progs/3.2_collect.sh +++ b/peptide_progs/3.2_collect.sh @@ -83,7 +83,10 @@ function cistrans() bind_rows(sept7) X <- with(ucsc,chr=="X") ucsc[X,"chr"] <- "23" - caprion <- select(pQTLdata::caprion,Protein,Accession,Gene) %>% + HOME <- Sys.getenv("HOME") + load(paste(HOME,"Caprion","pilot","caprion.rda",sep="/")) + caprion <- protein_list + caprion <- select(caprion,Protein,Accession,Gene) %>% mutate(Protein=gsub("_HUMAN","",Protein)) %>% rename(prot=Protein) quadruple <- function(d,label) data.frame(Gene=label,chr=min(d$chr),start=min(d$start),end=max(d$end)) diff --git a/progs/7_merge.sh b/progs/7_merge.sh index f4c4b3b..9dd43b6 100644 --- a/progs/7_merge.sh +++ b/progs/7_merge.sh @@ -63,7 +63,10 @@ function cistrans() bind_rows(sept7) X <- with(ucsc,chr=="X") ucsc[X,"chr"] <- "23" - caprion <- select(pQTLdata::caprion,Protein,Accession,Gene) %>% + HOME <- Sys.getenv("HOME") + load(paste(HOME,"Caprion","pilot","caprion.rda",sep="/")) + caprion <- protein_list + caprion <- select(caprion,Protein,Accession,Gene) %>% mutate(Protein=gsub("_HUMAN","",Protein)) %>% rename(prot=Protein) quadruple <- function(d,label) data.frame(Gene=label,chr=min(d$chr),start=min(d$start),end=max(d$end)) @@ -94,6 +97,10 @@ function cistrans() left_join(caprion_modified) %>% select(Gene,SNP,prot,log10p) posSNP <- select(merged,SNP,Chr,bp) + caprion_ucsc_modified <- left_join(caprion_modified, ucsc_modified) %>% + group_by(across(-c(start, end))) %>% + summarize(start = min(start), end = max(end), .groups = 'drop') + save(caprion_ucsc_modified,file=file.path(analysis,"docs","caprion_ucsc_modified.rda"),compress="xz") cis.vs.trans <- qtlClassifier(pqtls,posSNP,ucsc_modified,1e6) %>% mutate(geneChrom=as.integer(geneChrom),cis=if_else(Type=="cis",TRUE,FALSE)) table(cis.vs.trans$Type) diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 84b08e6..63db8ca 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ