Skip to content

Commit

Permalink
Merge branch 'release/1.0.12'
Browse files Browse the repository at this point in the history
  • Loading branch information
Walt Shands committed Nov 3, 2017
2 parents f497b9f + 7770eed commit 3e27f37
Show file tree
Hide file tree
Showing 10 changed files with 818 additions and 49 deletions.
4 changes: 2 additions & 2 deletions luigi_task_executor/CNV.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ConsonanceTask(luigi.Task):

redwood_host = luigi.Parameter("storage.ucsc-cgl.org")
redwood_token = luigi.Parameter("must_be_defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")

workflow_version = luigi.Parameter(default="must be defined")

Expand Down Expand Up @@ -251,7 +251,7 @@ class CNVCoordinator(luigi.Task):
redwood_token = luigi.Parameter("must_be_defined")
redwood_host = luigi.Parameter(default='storage.ucsc-cgp.org')
image_descriptor = luigi.Parameter("must be defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
tmp_dir = luigi.Parameter(default='/datastore')
max_jobs = luigi.Parameter(default='-1')
bundle_uuid_filename_to_file_uuid = {}
Expand Down
31 changes: 15 additions & 16 deletions luigi_task_executor/Dockerfile_decider
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,6 @@ RUN chown ubuntu:ubuntu consonance-client-"${CONSONANCE_VERSION}".jar
RUN pip install --upgrade pip
RUN pip install -U pip setuptools




WORKDIR /tmp

#install Luigi and components for recording Luigi history in case we implement task history
Expand All @@ -87,25 +84,15 @@ RUN touch /tmp/decider_log
RUN chmod 644 /tmp/decider_log
RUN chown ubuntu:ubuntu /tmp/decider_log



#get the crontab for ubuntu deciders
COPY setup_decider_jobs.sh /home/ubuntu/setup_decider_jobs.sh
RUN chmod a+x /home/ubuntu/setup_decider_jobs.sh
RUN chown ubuntu:ubuntu /home/ubuntu/setup_decider_jobs.sh


#setup the directory from where the Luigi decider scripts will be run
ENV LUIGI_RUNS_PATH=/home/ubuntu/luigi_decider_runs
WORKDIR ${LUIGI_RUNS_PATH}
RUN chown -R ubuntu:ubuntu ${LUIGI_RUNS_PATH}

#setup the virtual environment that the Luigi decider
#crontab script uses
RUN virtualenv luigienv
ENV VIRTUAL_ENV_PATH=${LUIGI_RUNS_PATH}/luigienv/bin
#make sure ubuntu owns the virtual env path
RUN chown -R ubuntu:ubuntu ${LUIGI_RUNS_PATH}
ENV PIPELINE_RUNS_PATH=/home/ubuntu/pipeline_deciders_and_scripts
WORKDIR ${PIPELINE_RUNS_PATH}
RUN chown -R ubuntu:ubuntu ${PIPELINE_RUNS_PATH}

#install the Luigi pipeline decider scripts in the run directory
COPY run_Luigi_Deciders.sh .
Expand All @@ -124,12 +111,24 @@ COPY Protect.py .
RUN chown ubuntu:ubuntu Protect.py
RUN chmod a+x Protect.py

COPY Fusion.py .
RUN chown ubuntu:ubuntu Fusion.py
RUN chmod a+x Fusion.py

COPY base_decider.py .
RUN chown ubuntu:ubuntu base_decider.py
RUN chmod a+x base_decider.py


#allow ubuntu (and all other users) to execute sudo without a password
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers

#run everything from now on as ubuntu
USER ubuntu

#go back to the ubuntu home dir
WORKDIR /home/ubuntu/

#install AWS cli according to http://docs.aws.amazon.com/cli/latest/userguide/awscli-install-linux.html
#install as user ubuntu so the cli is on ubuntu's path
RUN curl -O https://bootstrap.pypa.io/get-pip.py
Expand Down
126 changes: 126 additions & 0 deletions luigi_task_executor/Fusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from __future__ import print_function, division

import sys
from collections import defaultdict
from base_decider import base_Coordinator

class FusionCoordinator(base_Coordinator):

'''
Return a string that is the name that will be used to name the touchfile path
'''
def get_pipeline_name(self):
return 'Fusion'

'''
Return a dictionary of CWL option to reference file name so this information
can be added to the parameterized JSON input to the pipeline
'''
def get_cgp_job_reference_files(self):
cwl_option_to_reference_file_name = defaultdict()

#Add the correct CWL option and reference file names here
cwl_option_to_reference_file_name['index'] = "STARFusion-GRCh38gencode23.tar.gz"

return cwl_option_to_reference_file_name

'''
Returns a dictionary of keyword used in the dockstore tool runner parameterized JSON
and used in Elastic search to find needed samples. The JSON data does not depend
on samples found in the Elastic search so can be added here
'''
def get_pipeline_job_fixed_metadata(self):
cgp_pipeline_job_fixed_metadata = defaultdict()

cgp_pipeline_job_fixed_metadata["launch_type"] = "tool"
cgp_pipeline_job_fixed_metadata["analysis_type"] = "fusion_variant_calling"
cgp_pipeline_job_fixed_metadata["target_tool_prefix"] = 'registry.hub.docker.com/ucsctreehouse/fusion'
cgp_pipeline_job_fixed_metadata["target_tool_url"] = \
"https://dockstore.org/containers/registry.hub.docker.com/ucsctreehouse/fusion/"

cgp_pipeline_job_fixed_metadata["input_data_analysis_type"] = "sequence_upload"
cgp_pipeline_job_fixed_metadata["input_data_experimental_design"] = "RNA-Seq"
cgp_pipeline_job_fixed_metadata["normal_missing_item"] = "normal_fusion_workflow_0_2_x"
cgp_pipeline_job_fixed_metadata["normal_present_item"] = "normal_fusion_workflow_0_2_x"
cgp_pipeline_job_fixed_metadata["tumor_missing_item"] = "tumor_fusion_workflow_0_2_x"
cgp_pipeline_job_fixed_metadata["tumor_present_item"] = "tumor_fusion_workflow_0_2_x"
cgp_pipeline_job_fixed_metadata["normal_metadata_flag"] = "normal_fusion_workflow_0_2_x"
cgp_pipeline_job_fixed_metadata["tumor_metadata_flag"] = "tumor_fusion_workflow_0_2_x"


return cgp_pipeline_job_fixed_metadata

'''
Returns a dictionary of keywords to metadata that is used to setup the touch file path
and metadata and dockstore JSON file names. These sometimes depend on the sample name
or other information found through the Elastic search so is separated from the method
that gets the fixed metadata. This routine adds to the metadata dictionary for the pipeline.
'''
def get_pipeline_job_customized_metadata(self, cgp_pipeline_job_metadata):
cgp_pipeline_job_metadata['file_prefix'] = cgp_pipeline_job_metadata["submitter_sample_id"]
cgp_pipeline_job_metadata['metadata_json_file_name'] = cgp_pipeline_job_metadata['file_prefix'] + '_meta_data.json'
cgp_pipeline_job_metadata["last_touch_file_folder_suffix"] = ""

return cgp_pipeline_job_metadata


'''
Edit the following lines to set up the pipeline tool/workflow CWL options. This method
returns a dictionary of CWL keywords and values that make up the CWL input parameterized
JSON for the pipeline. This is the input to the pipeline to be run from Dockstore.
'''
def get_pipeline_parameterized_json(self, cgp_pipeline_job_metadata, analysis):
cgp_pipeline_job_json = defaultdict()

for file in analysis["workflow_outputs"]:
print("\nfile type:"+file["file_type"])
print("\nfile name:"+file["file_path"])

#if (file["file_type"] != "bam"): output an error message?

file_path = 'redwood' + '://' + self.redwood_host + '/' + analysis['bundle_uuid'] + '/' + \
self.fileToUUID(file["file_path"], analysis["bundle_uuid"]) + \
"/" + file["file_path"]

if 'fastq1' not in cgp_pipeline_job_json.keys():
cgp_pipeline_job_json['fastq1'] = defaultdict(dict)
cgp_pipeline_job_json['fastq1'] = {"class" : "File", "path" : file_path}
elif 'fastq2' not in cgp_pipeline_job_json.keys():
cgp_pipeline_job_json['fastq2'] = defaultdict(dict)
cgp_pipeline_job_json['fastq2'] = {"class" : "File", "path" : file_path}
else:
print("ERROR: Too many input files for Fusion pipeline in analysis output; extra file is:{}!!!".format(file_path), file=sys.stderr)
return [];

if 'parent_uuids' not in cgp_pipeline_job_metadata.keys():
cgp_pipeline_job_metadata["parent_uuids"] = []

if cgp_pipeline_job_metadata["sample_uuid"] not in cgp_pipeline_job_metadata["parent_uuids"]:
cgp_pipeline_job_metadata["parent_uuids"].append(cgp_pipeline_job_metadata["sample_uuid"])

cgp_pipeline_job_json["outputdir"] = '.'
cgp_pipeline_job_json["root-ownership"] = True
cgp_pipeline_job_json["cpu"] = 32

# Specify the output files here, using the options in the CWL file as keys
file_path = "/tmp/star-fusion-gene-list-filtered.final"
cgp_pipeline_job_json["output1"] = {"class" : "File", "path" : file_path}
file_path = "/tmp/star-fusion-gene-list-filtered.final.bedpe"
cgp_pipeline_job_json["output2"] = {"class" : "File", "path" : file_path}
file_path = "/tmp/star-fusion-non-filtered.final"
cgp_pipeline_job_json["output3"] = {"class" : "File", "path" : file_path}
file_path = "/tmp/star-fusion-non-filtered.final.bedpe"
cgp_pipeline_job_json["output4"] = {"class" : "File", "path" : file_path}

if 'fastq1' not in cgp_pipeline_job_json.keys() or 'fastq2' not in cgp_pipeline_job_json.keys():
#we must have paired end reads for the Fusion pipeline so return an empty
#list to indicate an error if we get here
print("\nERROR: UNABLE TO GET BOTH FASTQ FILES FOR FUSION PIPELINE; INCOMPLETE JSON IS:{}".format(cgp_pipeline_job_json) , file=sys.stderr)
return [];
else:
return cgp_pipeline_job_json


if __name__ == '__main__':
luigi.run()

4 changes: 2 additions & 2 deletions luigi_task_executor/Protect.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ConsonanceTask(luigi.Task):

redwood_host = luigi.Parameter("storage.ucsc-cgl.org")
redwood_token = luigi.Parameter("must_be_defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")

workflow_version = luigi.Parameter(default="must be defined")

Expand Down Expand Up @@ -322,7 +322,7 @@ class ProtectCoordinator(luigi.Task):
redwood_token = luigi.Parameter("must_be_defined")
redwood_host = luigi.Parameter(default='storage.ucsc-cgp.org')
image_descriptor = luigi.Parameter("must be defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
tmp_dir = luigi.Parameter(default='/datastore')
max_jobs = luigi.Parameter(default='-1')
bundle_uuid_filename_to_file_uuid = {}
Expand Down
4 changes: 2 additions & 2 deletions luigi_task_executor/RNA-Seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
class ConsonanceTask(luigi.Task):
redwood_host = luigi.Parameter("storage.ucsc-cgp.org")
redwood_token = luigi.Parameter("must_be_defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")

workflow_version = luigi.Parameter(default="must be defined")

Expand Down Expand Up @@ -452,7 +452,7 @@ class RNASeqCoordinator(luigi.Task):
redwood_token = luigi.Parameter("must_be_defined")
redwood_host = luigi.Parameter(default='storage.ucsc-cgl.org')
image_descriptor = luigi.Parameter("must be defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
tmp_dir = luigi.Parameter(default='/datastore')
max_jobs = luigi.Parameter(default='-1')
bundle_uuid_filename_to_file_uuid = {}
Expand Down
4 changes: 2 additions & 2 deletions luigi_task_executor/RNA-Seq_3_0_2-3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
class ConsonanceTask(luigi.Task):
redwood_host = luigi.Parameter("storage.ucsc-cgp.org")
redwood_token = luigi.Parameter("must_be_defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")

workflow_version = luigi.Parameter(default="must be defined")

Expand Down Expand Up @@ -460,7 +460,7 @@ class RNASeqCoordinator(luigi.Task):
redwood_token = luigi.Parameter("must_be_defined")
redwood_host = luigi.Parameter(default='storage.ucsc-cgl.org')
image_descriptor = luigi.Parameter("must be defined")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
tmp_dir = luigi.Parameter(default='/datastore')
max_jobs = luigi.Parameter(default='-1')
bundle_uuid_filename_to_file_uuid = {}
Expand Down
Loading

0 comments on commit 3e27f37

Please sign in to comment.