Merge branch 'release/1.0.12'

BD2KGenomics · Nov 3, 2017 · 3e27f37 · 3e27f37
2 parents f497b9f + 7770eed
commit 3e27f37
Show file tree

Hide file tree

Showing 10 changed files with 818 additions and 49 deletions.
diff --git a/luigi_task_executor/CNV.py b/luigi_task_executor/CNV.py
@@ -36,7 +36,7 @@ class ConsonanceTask(luigi.Task):
 
     redwood_host = luigi.Parameter("storage.ucsc-cgl.org")
     redwood_token = luigi.Parameter("must_be_defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
 
     workflow_version = luigi.Parameter(default="must be defined")
 
@@ -251,7 +251,7 @@ class CNVCoordinator(luigi.Task):
     redwood_token = luigi.Parameter("must_be_defined")
     redwood_host = luigi.Parameter(default='storage.ucsc-cgp.org')
     image_descriptor = luigi.Parameter("must be defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
     tmp_dir = luigi.Parameter(default='/datastore')
     max_jobs = luigi.Parameter(default='-1')
     bundle_uuid_filename_to_file_uuid = {}

diff --git a/luigi_task_executor/Dockerfile_decider b/luigi_task_executor/Dockerfile_decider
@@ -62,9 +62,6 @@ RUN chown ubuntu:ubuntu consonance-client-"${CONSONANCE_VERSION}".jar
 RUN pip install --upgrade pip
 RUN pip install -U pip setuptools
 
-
-
-
 WORKDIR /tmp
 
 #install Luigi and components for recording Luigi history in case we implement task history
@@ -87,25 +84,15 @@ RUN touch /tmp/decider_log
 RUN chmod 644 /tmp/decider_log
 RUN chown ubuntu:ubuntu /tmp/decider_log
 
-
-
 #get the crontab for ubuntu deciders
 COPY setup_decider_jobs.sh /home/ubuntu/setup_decider_jobs.sh
 RUN chmod a+x /home/ubuntu/setup_decider_jobs.sh
 RUN chown ubuntu:ubuntu /home/ubuntu/setup_decider_jobs.sh
 
-
 #setup the directory from where the Luigi decider scripts will be run
-ENV LUIGI_RUNS_PATH=/home/ubuntu/luigi_decider_runs
-WORKDIR ${LUIGI_RUNS_PATH}
-RUN chown -R ubuntu:ubuntu ${LUIGI_RUNS_PATH}
-
-#setup the virtual environment that the Luigi decider
-#crontab script uses
-RUN virtualenv luigienv
-ENV VIRTUAL_ENV_PATH=${LUIGI_RUNS_PATH}/luigienv/bin
-#make sure ubuntu owns the virtual env path
-RUN chown -R ubuntu:ubuntu ${LUIGI_RUNS_PATH}
+ENV PIPELINE_RUNS_PATH=/home/ubuntu/pipeline_deciders_and_scripts
+WORKDIR ${PIPELINE_RUNS_PATH}
+RUN chown -R ubuntu:ubuntu ${PIPELINE_RUNS_PATH}
 
 #install the Luigi pipeline decider scripts in the run directory
 COPY run_Luigi_Deciders.sh .
@@ -124,12 +111,24 @@ COPY Protect.py .
 RUN chown ubuntu:ubuntu Protect.py
 RUN chmod a+x Protect.py
 
+COPY Fusion.py . 
+RUN chown ubuntu:ubuntu Fusion.py
+RUN chmod a+x Fusion.py
+
+COPY base_decider.py . 
+RUN chown ubuntu:ubuntu base_decider.py
+RUN chmod a+x base_decider.py
+
+
 #allow ubuntu (and all other users) to execute sudo without a password
 RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
 
 #run everything from now on as ubuntu
 USER ubuntu
 
+#go back to the ubuntu home dir
+WORKDIR /home/ubuntu/
+
 #install AWS cli according to http://docs.aws.amazon.com/cli/latest/userguide/awscli-install-linux.html
 #install as user ubuntu so the cli is on ubuntu's path
 RUN curl -O https://bootstrap.pypa.io/get-pip.py

diff --git a/luigi_task_executor/Fusion.py b/luigi_task_executor/Fusion.py
@@ -0,0 +1,126 @@
+from __future__ import print_function, division
+
+import sys
+from collections import defaultdict
+from base_decider import base_Coordinator
+
+class FusionCoordinator(base_Coordinator):
+
+    '''
+    Return a string that is the name that will be used to name the touchfile path
+    '''
+    def get_pipeline_name(self):
+        return 'Fusion'
+
+    '''
+    Return a dictionary of CWL option to reference file name so this information
+    can be added to the parameterized JSON input to the pipeline
+    '''
+    def get_cgp_job_reference_files(self):
+        cwl_option_to_reference_file_name = defaultdict()        
+
+        #Add the correct CWL option and reference file names here
+        cwl_option_to_reference_file_name['index'] = "STARFusion-GRCh38gencode23.tar.gz" 
+
+        return cwl_option_to_reference_file_name
+
+    '''
+    Returns a dictionary of keyword used in the dockstore tool runner parameterized JSON
+    and used in Elastic search to find needed samples. The JSON data does not depend
+    on samples found in the Elastic search so can be added here
+    '''
+    def get_pipeline_job_fixed_metadata(self):
+        cgp_pipeline_job_fixed_metadata = defaultdict()
+
+        cgp_pipeline_job_fixed_metadata["launch_type"] = "tool"
+        cgp_pipeline_job_fixed_metadata["analysis_type"] = "fusion_variant_calling"
+        cgp_pipeline_job_fixed_metadata["target_tool_prefix"] = 'registry.hub.docker.com/ucsctreehouse/fusion'
+        cgp_pipeline_job_fixed_metadata["target_tool_url"] = \
+                   "https://dockstore.org/containers/registry.hub.docker.com/ucsctreehouse/fusion/"
+
+        cgp_pipeline_job_fixed_metadata["input_data_analysis_type"] = "sequence_upload"
+        cgp_pipeline_job_fixed_metadata["input_data_experimental_design"] = "RNA-Seq"
+        cgp_pipeline_job_fixed_metadata["normal_missing_item"] = "normal_fusion_workflow_0_2_x" 
+        cgp_pipeline_job_fixed_metadata["normal_present_item"] = "normal_fusion_workflow_0_2_x"
+        cgp_pipeline_job_fixed_metadata["tumor_missing_item"] = "tumor_fusion_workflow_0_2_x"
+        cgp_pipeline_job_fixed_metadata["tumor_present_item"] = "tumor_fusion_workflow_0_2_x"
+        cgp_pipeline_job_fixed_metadata["normal_metadata_flag"] = "normal_fusion_workflow_0_2_x"
+        cgp_pipeline_job_fixed_metadata["tumor_metadata_flag"] = "tumor_fusion_workflow_0_2_x"
+
+
+        return cgp_pipeline_job_fixed_metadata
+
+    '''
+    Returns a dictionary of keywords to metadata that is used to setup the touch file path
+    and metadata and dockstore JSON file names. These sometimes depend on the sample name
+    or other information found through the Elastic search so is separated from the method
+    that gets the fixed metadata. This routine adds to the metadata dictionary for the pipeline.
+    '''
+    def get_pipeline_job_customized_metadata(self, cgp_pipeline_job_metadata):
+        cgp_pipeline_job_metadata['file_prefix'] = cgp_pipeline_job_metadata["submitter_sample_id"]
+        cgp_pipeline_job_metadata['metadata_json_file_name'] = cgp_pipeline_job_metadata['file_prefix'] + '_meta_data.json'
+        cgp_pipeline_job_metadata["last_touch_file_folder_suffix"] = ""
+
+        return cgp_pipeline_job_metadata
+
+
+    '''
+    Edit the following lines to set up the pipeline tool/workflow CWL options. This method
+    returns a dictionary of CWL keywords and values that make up the CWL input parameterized
+    JSON for the pipeline. This is the input to the pipeline to be run from Dockstore. 
+    ''' 
+    def get_pipeline_parameterized_json(self, cgp_pipeline_job_metadata, analysis):
+        cgp_pipeline_job_json = defaultdict()
+
+        for file in analysis["workflow_outputs"]:
+            print("\nfile type:"+file["file_type"])
+            print("\nfile name:"+file["file_path"])
+
+            #if (file["file_type"] != "bam"): output an error message?
+
+            file_path = 'redwood' + '://' + self.redwood_host + '/' + analysis['bundle_uuid'] + '/' + \
+                       self.fileToUUID(file["file_path"], analysis["bundle_uuid"]) + \
+                       "/" + file["file_path"]
+
+            if 'fastq1' not in cgp_pipeline_job_json.keys():
+                cgp_pipeline_job_json['fastq1'] = defaultdict(dict)
+                cgp_pipeline_job_json['fastq1'] = {"class" : "File", "path" : file_path}
+            elif 'fastq2' not in cgp_pipeline_job_json.keys():
+                cgp_pipeline_job_json['fastq2'] = defaultdict(dict)
+                cgp_pipeline_job_json['fastq2'] = {"class" : "File", "path" : file_path}
+            else:
+                print("ERROR: Too many input files for Fusion pipeline in analysis output; extra file is:{}!!!".format(file_path), file=sys.stderr)
+                return [];
+
+            if 'parent_uuids' not in cgp_pipeline_job_metadata.keys():
+                cgp_pipeline_job_metadata["parent_uuids"] = []
+
+            if cgp_pipeline_job_metadata["sample_uuid"] not in cgp_pipeline_job_metadata["parent_uuids"]: 
+                cgp_pipeline_job_metadata["parent_uuids"].append(cgp_pipeline_job_metadata["sample_uuid"])
+
+            cgp_pipeline_job_json["outputdir"] = '.'
+            cgp_pipeline_job_json["root-ownership"] = True
+            cgp_pipeline_job_json["cpu"] = 32
+
+            # Specify the output files here, using the options in the CWL file as keys
+            file_path = "/tmp/star-fusion-gene-list-filtered.final"
+            cgp_pipeline_job_json["output1"] = {"class" : "File", "path" : file_path}
+            file_path = "/tmp/star-fusion-gene-list-filtered.final.bedpe"
+            cgp_pipeline_job_json["output2"] = {"class" : "File", "path" : file_path}
+            file_path = "/tmp/star-fusion-non-filtered.final"
+            cgp_pipeline_job_json["output3"] = {"class" : "File", "path" : file_path}
+            file_path = "/tmp/star-fusion-non-filtered.final.bedpe"
+            cgp_pipeline_job_json["output4"] = {"class" : "File", "path" : file_path}
+
+        if 'fastq1' not in cgp_pipeline_job_json.keys() or 'fastq2' not in cgp_pipeline_job_json.keys():
+            #we must have paired end reads for the Fusion pipeline so return an empty
+            #list to indicate an error if we get here
+            print("\nERROR: UNABLE TO GET BOTH FASTQ FILES FOR FUSION PIPELINE; INCOMPLETE JSON IS:{}".format(cgp_pipeline_job_json) , file=sys.stderr)
+            return [];
+        else:
+            return cgp_pipeline_job_json
+
+
+if __name__ == '__main__':
+    luigi.run()
+
diff --git a/luigi_task_executor/Protect.py b/luigi_task_executor/Protect.py
@@ -36,7 +36,7 @@ class ConsonanceTask(luigi.Task):
 
     redwood_host = luigi.Parameter("storage.ucsc-cgl.org")
     redwood_token = luigi.Parameter("must_be_defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
 
     workflow_version = luigi.Parameter(default="must be defined")
 
@@ -322,7 +322,7 @@ class ProtectCoordinator(luigi.Task):
     redwood_token = luigi.Parameter("must_be_defined")
     redwood_host = luigi.Parameter(default='storage.ucsc-cgp.org')
     image_descriptor = luigi.Parameter("must be defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
     tmp_dir = luigi.Parameter(default='/datastore')
     max_jobs = luigi.Parameter(default='-1')
     bundle_uuid_filename_to_file_uuid = {}

diff --git a/luigi_task_executor/RNA-Seq.py b/luigi_task_executor/RNA-Seq.py
@@ -29,7 +29,7 @@
 class ConsonanceTask(luigi.Task):
     redwood_host = luigi.Parameter("storage.ucsc-cgp.org")
     redwood_token = luigi.Parameter("must_be_defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
 
     workflow_version = luigi.Parameter(default="must be defined")
 
@@ -452,7 +452,7 @@ class RNASeqCoordinator(luigi.Task):
     redwood_token = luigi.Parameter("must_be_defined")
     redwood_host = luigi.Parameter(default='storage.ucsc-cgl.org')
     image_descriptor = luigi.Parameter("must be defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
     tmp_dir = luigi.Parameter(default='/datastore')
     max_jobs = luigi.Parameter(default='-1')
     bundle_uuid_filename_to_file_uuid = {}

diff --git a/luigi_task_executor/RNA-Seq_3_0_2-3.py b/luigi_task_executor/RNA-Seq_3_0_2-3.py
@@ -29,7 +29,7 @@
 class ConsonanceTask(luigi.Task):
     redwood_host = luigi.Parameter("storage.ucsc-cgp.org")
     redwood_token = luigi.Parameter("must_be_defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
 
     workflow_version = luigi.Parameter(default="must be defined")
 
@@ -460,7 +460,7 @@ class RNASeqCoordinator(luigi.Task):
     redwood_token = luigi.Parameter("must_be_defined")
     redwood_host = luigi.Parameter(default='storage.ucsc-cgl.org')
     image_descriptor = luigi.Parameter("must be defined")
-    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.21")
+    dockstore_tool_running_dockstore_tool = luigi.Parameter(default="quay.io/ucsc_cgl/dockstore-tool-runner:1.0.22")
     tmp_dir = luigi.Parameter(default='/datastore')
     max_jobs = luigi.Parameter(default='-1')
     bundle_uuid_filename_to_file_uuid = {}