Skip to content

Commit

Permalink
Deployed 2a5222d with MkDocs version: 1.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
jinghuazhao committed Dec 20, 2024
1 parent 0dff980 commit bb06324
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 42 deletions.
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -296,5 +296,5 @@ <h4 class="modal-title" id="keyboardModalLabel">Keyboard Shortcuts</h4>

<!--
MkDocs version : 1.5.3
Build Date UTC : 2024-12-19 18:01:17.146975+00:00
Build Date UTC : 2024-12-20 22:07:42.189113+00:00
-->
88 changes: 52 additions & 36 deletions misc/coloc.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
liftRegion <- function(x,flanking=1e6)
liftRegion <- function(x, flanking = 1e6)
{
gr <- with(x,GenomicRanges::GRanges(seqnames=chr,IRanges::IRanges(start,end))+flanking)
gr <- GenomicRanges::GRanges(seqnames = x$chr,
ranges = IRanges::IRanges(start = x$start - flanking, end = x$end + flanking))
seqlevelsStyle(gr) <- "UCSC"
f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)
gr38 <- rtracklayer::liftOver(gr, chain)
chr <- gsub("chr","",colnames(table(seqnames(gr38))))
start <- min(unlist(start(gr38)))
end <- max(unlist(end(gr38)))
invisible(list(chr=chr[1],start=start,end=end,region=paste0(chr[1],":",start,"-",end)))
lifted_chromosomes <- seqnames(gr38)
lifted_start <- min(start(gr38))
lifted_end <- max(end(gr38))
chr <- gsub("chr", "", as.character(lifted_chromosomes[1]))
region <- paste0(chr, ":", lifted_start, "-", lifted_end)
invisible(list(chr = chr, start = lifted_start, end = lifted_end, region = region))
}

sumstats <- function(prot,chr,region37)
sumstats <- function(prot,chr,region37,chain)
{
cat("GWAS sumstats\n")
tbl <- file.path(analysis,"METAL_dr",paste0(prot,"_dr-1.tbl.gz"))
Expand All @@ -20,6 +25,8 @@ sumstats <- function(prot,chr,region37)
gwas_granges <- with(gwas_stats,GRanges(seqnames = paste0("chr",dplyr::if_else(Chromosome==23,"X",paste(Chromosome))),
ranges = IRanges(start = Position, end = Position),
id = ID,REF=Allele2,ALT=Allele1,AF=Freq1,ES=Effect,SE=StdErr,LP=-logP,SS=N))
f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)
gwas_stats_hg38 <- rtracklayer::liftOver(gwas_granges, chain) %>%
unlist() %>%
dplyr::as_tibble() %>%
Expand Down Expand Up @@ -84,7 +91,11 @@ gtex <- function(gwas_stats_hg38,ensGene,region38)
~dplyr::filter(., !is.na(se)))
invisible(sapply(1:49, function(i) {
f <- file.path(analysis, "coloc", "GTEx", "sumstats", paste0(prot, "-", names(result_filtered)[i], ".gz"))
write.table(result_filtered[[i]], file = gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
gz <- gzfile(f, "w")
if (!is.null(result_filtered[[i]])) {
write.table(result_filtered[[i]], file = gz, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
}
close(gz)
}))
purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "qtl_id")
}
Expand All @@ -106,17 +117,18 @@ ge <- function(gwas_stats_hg38,ensGene,region38)
result_filtered <- purrr::map(result_list[lapply(result_list,nrow)!=0],
~dplyr::filter(., !is.na(se)))
invisible(sapply(1:length(result_filtered), function(i) {
f <- file.path(analysis, "coloc", "eQTLCatalogue", "sumstats", paste0(prot, "-", names(result_filtered)[i], ".gz"))
write.table(result_filtered[[i]], file = gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
gz <- gzfile(f, "w")
if (!is.null(result_filtered[[i]])) {
write.table(result_filtered[[i]], file = gz, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
}
close(gz)
}))
purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "unique_id")
}

gtex_coloc <- function(prot,snp,chr,ensGene,region37,region38,out)
gtex_coloc <- function(prot,chr,ensGene,chain,region37,region38,out)
{
gwas_stats_hg38 <- sumstats(prot,chr,region37)
f <- file.path(psum,paste0(prot,"-",snp,".gz"))
write.table(gwas_stats_hg38,file=gzfile(f, "w"), col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
gwas_stats_hg38 <- sumstats(prot,chr,region37,chain)
df_gtex <- gtex(gwas_stats_hg38,ensGene,region38)
if (!exists("df_gtex")) return
saveRDS(df_gtex,file=paste0(out,".rds"))
Expand All @@ -128,7 +140,7 @@ gtex_coloc <- function(prot,snp,chr,ensGene,region37,region38,out)
height = 15, width = 15, units = "cm", dpi = 300)
}

ge_coloc <- function(prot,chr,ensGene,region37,region38,out)
ge_coloc <- function(prot,chr,ensGene,chain,region37,region38,out)
{
gwas_stats_hg38 <- sumstats(prot,chr,region37)
df_ge <- ge(gwas_stats_hg38,ensGene,region38)
Expand All @@ -142,7 +154,7 @@ ge_coloc <- function(prot,chr,ensGene,region37,region38,out)
height = 15, width = 15, units = "cm", dpi = 300)
}

all_coloc <- function(prot,chr,ensGene,region37,region38,out)
all_coloc <- function(prot,chr,ensGene,chain,region37,region38,out)
{
gwas_stats_hg38 <- sumstats(prot,chr,region37)
df_microarray <- microarray(gwas_stats_hg38,ensGene,region38)
Expand All @@ -164,25 +176,27 @@ all_coloc <- function(prot,chr,ensGene,region37,region38,out)

single_run <- function(r, batch="GTEx")
{
ss <- subset(pQTLdata::caprion,Protein==paste0(prot,"_HUMAN"))
ensGene <- ss[["ensGenes"]]
sentinel <- sentinels[r,]
chr <- with(sentinel,geneChrom)
ensRegion37 <- with(sentinel,
{
start <- geneStart-M
if (start<0) start <- 0
end <- geneEnd+M
paste0(geneChrom,":",start,"-",end)
paste0(chr,":",start,"-",end)
})
x <- list(chr=geneChrom,start=geneStart,end=geneEnd)
ss <- subset(pQTLdata::caprion,Protein==paste0(sentinel[["prot"]],"_HUMAN"))
ensGene <- ss[["ensGenes"]]
x <- with(sentinel,list(chr=geneChrom,start=geneStart,end=geneEnd))
lr <- liftRegion(x)
ensRegion38 <- with(lr,paste0(chr,":",start-M,"-",end+M))
cat(geneChrom,ensGene,ensRegion37,ensRegion38,"\n")
f <- file.path(analysis,"coloc",batch,paste0(prot,"-",snp))
cat(chr,ensGene,ensRegion37,ensRegion38,"\n")
f <- file.path(analysis,"coloc",batch,with(sentinel,paste0(prot,"-",SNP)))
if (batch=="GTEx")
{
gtex_coloc(prot,snp,geneChrom,ensGene,ensRegion37,ensRegion38,f)
gtex_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f)
} else {
ge_coloc(prot,geneChrom,ensGene,ensRegion37,ensRegion38,f)
ge_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f)
}
}

Expand All @@ -202,6 +216,15 @@ collect <- function(batch="GTEx")
if (nrow(rds)==0) next
df_coloc <- rbind(df_coloc,data.frame(prot=prot,rsid=rsid,snpid=snpid,rds))
}
f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)
gr <- with(arrange(pQTLdata::caprion,Protein), {
GenomicRanges::GRanges(seqnames = chr,
ranges = IRanges::IRanges(start = start - M, end = end + M),
Protein=Protein,Gene=Gene)
})
seqlevelsStyle(gr) <- "UCSC"
gr38=rtracklayer::liftOver(gr,chain)
caprion_upd <- pQTLdata::caprion %>%
mutate(prot=gsub("_HUMAN","",Protein),gene=Gene)
df <- dplyr::rename(df_coloc,H0=PP.H0.abf,H1=PP.H1.abf,H2=PP.H2.abf,H3=PP.H3.abf,H4=PP.H4.abf) %>%
Expand Down Expand Up @@ -244,6 +267,11 @@ collect <- function(batch="GTEx")

loop_slowly <- function() for (r in 1:nrow(sentinels)) single_run(r)

# Environmental variables

pkgs <- c("dplyr", "gap", "ggplot2", "readr", "coloc", "GenomicRanges","pQTLtools","rtracklayer","seqminer")
invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))

options(width=200)
HOME <- Sys.getenv("HOME")
HPC_WORK <- Sys.getenv("HPC_WORK")
Expand All @@ -256,9 +284,6 @@ if (!dir.exists(gsum)) dir.create(gsum)
esum <- file.path(analysis,"coloc","eQTLCatalogue","sumstats")
if (!dir.exists(esum)) dir.create(esum)

pkgs <- c("dplyr", "gap", "ggplot2", "readr", "coloc", "GenomicRanges","pQTLtools","rtracklayer","seqminer")
invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))

sevens <- "
ENSG00000131142 - CCL25 19 8052318 8062660
ENSG00000125735 - TNFSF14 19 6661253 6670588
Expand All @@ -274,17 +299,8 @@ caprion <- left_join(pQTLdata::caprion,updates)
sentinels <- subset(read.csv(file.path(analysis,"work","caprion_dr.cis.vs.trans")),cis)
fp <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","tabix_ftp_paths.tsv")
tabix_paths <- read.delim(fp, stringsAsFactors = FALSE) %>% dplyr::as_tibble()
f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)

r <- as.integer(Sys.getenv("r"))
sentinel <- sentinels[r,]
prot <- sentinel[["prot"]]
snp <- sentinel[["SNP"]]
geneChrom <- sentinel[["geneChrom"]]
geneStart <- sentinel[["geneStart"]]
geneEnd <- sentinel[["geneEnd"]]

single_run(r)
single_run(r,batch="eQTLCatalogue")

Expand Down
12 changes: 9 additions & 3 deletions misc/coloc.sb
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ function run_coloc()
function coloc()
# GTEx, eQTL-Catalogue
{
for dir in GTEx/sumstats eQTLCatalogue/sumstats log slurm
do
if [ ! -d ${dir} ]; then
mkdir -p ${analysis}/coloc/${dir}
fi
done
export r=${SLURM_ARRAY_TASK_ID}
export cvt=${analysis}/work/caprion_dr.cis.vs.trans
read prot MarkerName < \
Expand All @@ -160,9 +166,9 @@ function coloc()
echo ${r} - ${prot} - ${MarkerName}
export prot=${prot}
export MarkerName=${MarkerName}
cd ${analysis}/coloc
R --no-save < ${analysis}/misc/coloc.R 2>&1 | \
tee log/${prot}-${MarkerName}.log
cd ${analysis}
R --no-save < coloc.R 2>&1 | \
tee ${analysis}/coloc/log/${prot}-${MarkerName}.log
cd -
}

Expand Down
5 changes: 4 additions & 1 deletion peptide_progs/3.2_collect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ function cistrans()
bind_rows(sept7)
X <- with(ucsc,chr=="X")
ucsc[X,"chr"] <- "23"
caprion <- select(pQTLdata::caprion,Protein,Accession,Gene) %>%
HOME <- Sys.getenv("HOME")
load(paste(HOME,"Caprion","pilot","caprion.rda",sep="/"))
caprion <- protein_list
caprion <- select(caprion,Protein,Accession,Gene) %>%
mutate(Protein=gsub("_HUMAN","",Protein)) %>%
rename(prot=Protein)
quadruple <- function(d,label) data.frame(Gene=label,chr=min(d$chr),start=min(d$start),end=max(d$end))
Expand Down
9 changes: 8 additions & 1 deletion progs/7_merge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ function cistrans()
bind_rows(sept7)
X <- with(ucsc,chr=="X")
ucsc[X,"chr"] <- "23"
caprion <- select(pQTLdata::caprion,Protein,Accession,Gene) %>%
HOME <- Sys.getenv("HOME")
load(paste(HOME,"Caprion","pilot","caprion.rda",sep="/"))
caprion <- protein_list
caprion <- select(caprion,Protein,Accession,Gene) %>%
mutate(Protein=gsub("_HUMAN","",Protein)) %>%
rename(prot=Protein)
quadruple <- function(d,label) data.frame(Gene=label,chr=min(d$chr),start=min(d$start),end=max(d$end))
Expand Down Expand Up @@ -94,6 +97,10 @@ function cistrans()
left_join(caprion_modified) %>%
select(Gene,SNP,prot,log10p)
posSNP <- select(merged,SNP,Chr,bp)
caprion_ucsc_modified <- left_join(caprion_modified, ucsc_modified) %>%
group_by(across(-c(start, end))) %>%
summarize(start = min(start), end = max(end), .groups = 'drop')
save(caprion_ucsc_modified,file=file.path(analysis,"docs","caprion_ucsc_modified.rda"),compress="xz")
cis.vs.trans <- qtlClassifier(pqtls,posSNP,ucsc_modified,1e6) %>%
mutate(geneChrom=as.integer(geneChrom),cis=if_else(Type=="cis",TRUE,FALSE))
table(cis.vs.trans$Type)
Expand Down
Binary file modified sitemap.xml.gz
Binary file not shown.

0 comments on commit bb06324

Please sign in to comment.