Skip to content

Commit

Permalink
Merge pull request #199 from mskcc/release/0.9.0
Browse files Browse the repository at this point in the history
Release 0.9.0
  • Loading branch information
evanbiederstedt authored Apr 30, 2019
2 parents 9238d1f + 582af9e commit 2d3c54d
Show file tree
Hide file tree
Showing 18 changed files with 539 additions and 642 deletions.
489 changes: 32 additions & 457 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion conf/awsbatch.config.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

aws.region = <AWS-REGION>
aws.client.storageEncryption = <STORAGE-ENCRYPTION>
aws.mountPoint = '/scratch'
aws.batch.volumes = ['/scratch']
workDir = <AWS-S3-WORKDIR>
outDir = <AWS-S3-OUTDIR>

Expand Down
8 changes: 7 additions & 1 deletion conf/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,17 @@
withName:MergeDellyAndManta {
container = "cmopipeline/bcftools:v1.9"
}
withName:MergeStrelka2Vcfs {
container = "cmopipeline/bcftools:v1.9"
}
withName:RunBcfToolsOnDellyManta {
container = "cmopipeline/bcftools:v1.9"
}
withName:RunBcfToolsFilterOnDellyManta {
container = "cmopipeline/bcftools:v1.9"
}
withName:CombineChannel {
container = "ubuntu:latest"
container = "cmopipeline/bcftools:v1.9"
}
withName:RunBcfToolsFilterNorm {
container = "cmopipeline/bcftools:v1.9"
Expand All @@ -103,4 +106,7 @@
withName:RunHlaPolysolver {
container = "sachet/polysolver:v4"
}
withName:RunConpair {
container = "cmopipeline/conpair:v0.3.3"
}
}
7 changes: 5 additions & 2 deletions conf/references.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ params {
snpeffDb = "GRCh37.75"
vepCacheVersion = "95"
vepCache = "${params.reference_base}/mskcc-igenomes/grch37/vep/cache"
vcf2mafFilterVcf = "${vepCache}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz"
vcf2mafFilterVcfIndex = "${vcf2mafFilterVcf}.tbi"
msiSensorList = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list"
facetsVcf = "${params.reference_base}/mskcc-igenomes/igenomes/Homo_sapiens/GATK/b37/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf"
svCallingExcludeRegions = "${params.reference_base}/mskcc-igenomes/grch37/sv_calling/human.hg19.excl.tsv"
Expand All @@ -54,6 +52,11 @@ params {
agilentTargetsIndex = "${agilentTargets}.tbi"
wgsTargets = "${params.reference_base}/mskcc-igenomes/grch37/targets/b37_wgs_calling_regions.v1.bed.gz"
wgsTargetsIndex = "${wgsTargets}.tbi"
repeatMasker = "${params.reference_base}/mskcc-igenomes/grch37/annotation/rmsk_mod.bed.gz"
repeatMaskerIndex = "${repeatMasker}.tbi"
mapabilityBlacklist = "${params.reference_base}/mskcc-igenomes/grch37/annotation/wgEncodeDacMapabilityConsensusExcludable.bed.gz"
mapabilityBlacklistIndex = "${mapabilityBlacklist}.tbi"
isoforms = "${params.reference_base}/mskcc-igenomes/grch37/annotation/isoforms"
}
'GRCh38' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
Expand Down
9 changes: 8 additions & 1 deletion conf/resources.config
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
cpus = { 2 }
memory = { 6.GB }
}
withName:MergeStrelka2Vcfs {
cpus = { 2 }
memory = { 6.GB }
}
withName:MergeDellyAndManta {
cpus = { 2 }
memory = { 6.GB }
Expand Down Expand Up @@ -122,5 +126,8 @@
cpus = { 2 }
memory = { 6.GB }
}
withName:RunConpair {
cpus = { 2 }
memory = { 6.GB }
}
}

13 changes: 10 additions & 3 deletions conf/resources_aws.config
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
cpus = { 8 }
memory = { 32.GB }
}
withName:MergeStrelka2Vcfs {
cpus = { 8 }
memory = { 32.GB }
}
withName:MergeDellyAndManta {
cpus = { 8 }
memory = { 32.GB }
Expand All @@ -103,8 +107,8 @@
memory = { 32.GB }
}
withName:CombineChannel{
cpus = { 2 }
memory = { 4.GB }
cpus = { 8 }
memory = { 32.GB }
}
withName:RunBcfToolsFilterNorm {
cpus = { 8 }
Expand All @@ -122,5 +126,8 @@
cpus = { 8 }
memory = { 32.GB }
}
withName:RunConpair {
cpus = { 8 }
memory = { 32.GB }
}
}

12 changes: 10 additions & 2 deletions conf/resources_juno.config
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
cpus = { 8 }
memory = { 4.MB }
}
withName:MergeStrelka2Vcfs {
cpus = { 8 }
memory = { 4.MB }
}
withName:MergeDellyAndManta {
cpus = { 8 }
memory = { 4.MB }
Expand All @@ -103,8 +107,8 @@
memory = { 4.MB }
}
withName:CombineChannel{
cpus = { 2 }
memory = { 2.MB }
cpus = { 8 }
memory = { 4.MB }
}
withName:RunBcfToolsFilterNorm {
cpus = { 8 }
Expand All @@ -122,5 +126,9 @@
cpus = { 8 }
memory = { 4.MB }
}
withName:RunConpair {
cpus = { 8 }
memory = { 4.MB }
}
}

56 changes: 56 additions & 0 deletions containers/conpair/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
FROM ubuntu:16.04

LABEL authors="Nikhil Kumar ([email protected]), Evan Biederstedt ([email protected]), C. Allan Bolipata ([email protected])" \
version.image="1.0.0" \
version.conpair="0.3.3" \
version.gatk="3.8-1" \
source.conpair="https://github.com/mskcc/Conpair/releases/tag/0.3.3" \
source.r="https://pkgs.alpinelinux.org/package/edge/community/x86/R"

ENV CONPAIR_VERSION 0.3.3

RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y python && \
apt-get install -y wget \
curl \
bc \
unzip \
less \
bedtools \
samtools \
openjdk-8-jdk \
tabix \
bzip2 \
software-properties-common && \
apt-get -y clean && \
apt-get -y autoclean && \
apt-get -y autoremove

# Download GATK
RUN cd /tmp \
&& wget "https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.8-1-0-gf15c1c3ef" -O gatk-3.8-1.tar.bz2 \
&& tar xvjf gatk-3.8-1.tar.bz2 \
&& mv /tmp/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar /usr/bin/GenomeAnalysisTK.jar \
&& rm -rf /tmp/*

## install R
RUN apt-get install -y r-base-core r-base-dev \
## install R dependencies
&& R -e "install.packages('ggplot2', dependencies=TRUE, repos='http://cran.rstudio.com/')" \
&& R -e "install.packages('reshape2', dependencies=TRUE, repos='http://cran.rstudio.com/')"

## Python dependencies and Java
## install numpy and scip
RUN pip install numpy==1.15.4 \
&& pip install scipy==1.1.0

## download Conpair
RUN cd /tmp && wget https://github.com/mskcc/Conpair/archive/${CONPAIR_VERSION}.tar.gz \
&& tar xvzf ${CONPAIR_VERSION}.tar.gz \
## install conpair
&& mv /tmp/Conpair-${CONPAIR_VERSION} /usr/bin/conpair \
## clean up
&& rm -rf /tmp/*

ENV PYTHONNOUSERSITE set
28 changes: 28 additions & 0 deletions containers/lohhla/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM continuumio/anaconda3:5.3.0

LABEL authors="Evan Biederstedt ([email protected])" \
version.image="1.0.0" \
version.lohhla="1.0.0"

RUN conda create -n env python=3.6
RUN echo "source activate env" > ~/.bashrc

ENV PATH /opt/conda/envs/env/bin:$PATH

# Updating Anaconda packages
RUN conda update conda
RUN conda update anaconda
RUN conda update --all

## Set up channels
## https://bioconda.github.io/index.html
RUN conda config --add channels defaults
RUN conda config --add channels bioconda
RUN conda config --add channels conda-forge

## install lohhla
## https://bioconda.github.io/recipes/lohhla/README.html
RUN conda install lohhla
RUN conda update lohhla


32 changes: 32 additions & 0 deletions docs/ANNOTATION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Variant annotation

## GRCh37

### SNVs and indels
Basic annotation of merged `vcf` files from the individual variants callers is carried out in two steps. First, the combined `vcf` is annotated with information from [RepeatMasker](http://www.repeatmasker.org/) and the [ENCODE consortium](http://rohsdb.cmb.usc.edu/GBshape/ENCODE/index.html). These files are retrieved from the [UCSC genome browser](https://genome.ucsc.edu) and parsed as such:

``` shell
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/rmsk.txt.gz
gunzip rmsk.txt.gz
cut -f6-8,12 rmsk.txt | \
grep -e "Low_complexity" -e "Simple_repeat" | \
sed 's/^chr//g'> rmsk_mod.bed
bgzip rmsk_mod.bed
tabix --preset bed rmsk_mod.bed.gz

wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDacMapabilityConsensusExcludable.bed.gz
gunzip wgEncodeDacMapabilityConsensusExcludable.bed.gz
sed -i 's/^chr//g' wgEncodeDacMapabilityConsensusExcludable.bed
bgzip wgEncodeDacMapabilityConsensusExcludable.bed
tabix --preset bed wgEncodeDacMapabilityConsensusExcludable.bed.gz
```

Subsequently, [vcf2maf](https://github.com/mskcc/vcf2maf) is used to annotate functional effects of mutations as well as other metadata using [VEP](https://www.ensembl.org/vep). The `--custom-enst` argument to vcf2maf takes a list of preferred gene transcript isoforms which to map mutations onto. We supply a consensus list of [`isoform_overrides_at_mskcc` and `isoform_overrides_uniprot`](https://github.com/mskcc/vcf2maf/tree/master/data), generated as such:
``` r
t1 = readr::read_tsv('isoform_overrides_at_mskcc')
t2 = readr::read_tsv('isoform_overrides_uniprot')
t2 %>%
dplyr::filter(gene_name %nin% t1$gene_name) %>%
dplyr::bind_rows(., t1) %>%
readr::write_tsv('isoforms')
```
2 changes: 2 additions & 0 deletions docs/INTERVALS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Genomic intervals

## GRCH37

### Genome
Expand Down
8 changes: 4 additions & 4 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Contents

1. [References](https://github.mskcc/vaporware/blob/master/docs/REFERENCES.md)
1. [Intervals](https://github.mskcc/vaporware/blob/master/docs/INTERVALS.md)
2. ...
3. ...
1. [References](https://github.com/mskcc/vaporware/blob/master/docs/REFERENCES.md)
2. [Intervals](https://github.com/mskcc/vaporware/blob/master/docs/INTERVALS.md)
3. [Variant Annotation](https://github.com/mskcc/vaporware/blob/readme_updates/docs/ANNOTATION.md)
4. ...
37 changes: 27 additions & 10 deletions germline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -594,20 +594,37 @@ def extractBamFiles(tsvFile) {
Channel.from(tsvFile)
.splitCsv(sep: '\t')
.map { row ->
VaporwareUtils.checkNumberOfItem(row, 8)
checkNumberOfItem(row, 8)
def assay = row[0]
def target = row[1]
def idTumor = row[2]
def idNormal = row[3]
def bamTumor = VaporwareUtils.returnFile(row[4])
def bamNormal = VaporwareUtils.returnFile(row[5])
def baiTumor = VaporwareUtils.returnFile(row[6])
def baiNormal = VaporwareUtils.returnFile(row[7])

VaporwareUtils.checkFileExtension(bamTumor,".bam")
VaporwareUtils.checkFileExtension(bamNormal,".bam")
VaporwareUtils.checkFileExtension(baiTumor,".bai")
VaporwareUtils.checkFileExtension(baiNormal,".bai")
def bamTumor = returnFile(row[4])
def bamNormal = returnFile(row[5])
def baiTumor = returnFile(row[6])
def baiNormal = returnFile(row[7])

checkFileExtension(bamTumor,".bam")
checkFileExtension(bamNormal,".bam")
checkFileExtension(baiTumor,".bai")
checkFileExtension(baiNormal,".bai")
[ assay, target, idTumor, idNormal, bamTumor, bamNormal, baiTumor, baiNormal ]
}
}

// Check file extension
def checkFileExtension(it, extension) {
if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information"
}

// Check if a row has the expected number of item
def checkNumberOfItem(row, number) {
if (row.size() != number) exit 1, "Malformed row in TSV file: ${row}, see --help for more information"
return true
}

// Return file if it exists
def returnFile(it) {
if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
return file(it)
}
Loading

0 comments on commit 2d3c54d

Please sign in to comment.