From 0324419d0362598c1927c92777457a0a7a755d0b Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Thu, 16 Jun 2022 16:17:49 -0400 Subject: [PATCH 01/18] set input files for aln wf as [[File]] rather than [[[File]]] to match new wdl --- chalicelib/checks/wfr_encode_checks.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 2af693d8..4670894c 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -207,7 +207,10 @@ def chipseq_status(connection, **kwargs): if control: # control run on tf mode # input_files = {'chip.ctl_fastqs': [exp_files]} - input_files['chip.ctl_fastqs'] = [exp_files] + + # exp_files is of the form [[Files]] + # for v1.1.1 chip.ctl_fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [[Files]] + input_files['chip.ctl_fastqs'] = exp_files control_parameters = { "chip.pipeline_type": 'tf', "chip.choose_ctl.always_use_pooled_ctl": True, @@ -239,7 +242,9 @@ def chipseq_status(connection, **kwargs): # run step1 else: # input_files = {'chip.fastqs': [exp_files]} - input_files['chip.fastqs'] = [exp_files] + # exp_files is of the form [[Files]] + # for v1.1.1 chip.fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [[Files]] + input_files['chip.fastqs'] = exp_files exp_parameters = { "chip.pipeline_type": target_type, "chip.choose_ctl.always_use_pooled_ctl": True, From f74499dd644f023c8122f69cb4445b962730cdc6 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Wed, 22 Jun 2022 16:55:18 -0400 Subject: [PATCH 02/18] set checks to use new workflows, testing --- chalicelib/checks/helpers/wfr_utils.py | 6 +++--- chalicelib/checks/helpers/wfrset_utils.py | 12 ++++++------ chalicelib/checks/wfr_encode_checks.py | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py index a9cc32c8..5ede83f5 100644 --- a/chalicelib/checks/helpers/wfr_utils.py +++ b/chalicelib/checks/helpers/wfr_utils.py @@ -74,15 +74,15 @@ }, "encode-chipseq-aln-chip": { "run_time": 200, - "accepted_versions": ["1.1.1"] + "accepted_versions": ["1.1.2"] }, "encode-chipseq-aln-ctl": { "run_time": 200, - "accepted_versions": ["1.1.1"] + "accepted_versions": ["1.1.2"] }, "encode-chipseq-postaln": { "run_time": 200, - "accepted_versions": ["1.1.1"] + "accepted_versions": ["1.1.2"] }, "encode-atacseq-aln": { "run_time": 200, diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py index c13cfd87..8be80d27 100644 --- a/chalicelib/checks/helpers/wfrset_utils.py +++ b/chalicelib/checks/helpers/wfrset_utils.py @@ -194,8 +194,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-aln-chip", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-chip", + "app_name": "encode-chipseq-aln-alpha", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-alpha", "parameters": {}, "config": {}, 'custom_pf_fields': { @@ -212,8 +212,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-aln-ctl", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl", + "app_name": "encode-chipseq-aln-ctl-alpha", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl-alpha", "parameters": {}, "config": {}, 'custom_pf_fields': { @@ -225,8 +225,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-postaln", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln", + "app_name": "encode-chipseq-postaln-alpha", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln-alpha", "parameters": {}, "config": {}, 'custom_pf_fields': { diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 4670894c..d0f30a10 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -50,9 +50,9 @@ def chipseq_status(connection, **kwargs): # for control sets, run step1c on each experiment and finish # for non-control sets, run step1 on each experiment, check if control is ready, run step2 on set step0_name = 'merge-fastq' - step1_name = 'encode-chipseq-aln-chip' - step1c_name = 'encode-chipseq-aln-ctl' - step2_name = 'encode-chipseq-postaln' + step1_name = 'encode-chipseq-aln-alpha' + step1c_name = 'encode-chipseq-aln-ctl-alpha' + step2_name = 'encode-chipseq-postaln-alpha' for a_set in res: set_acc = a_set['accession'] From 1d8e127b46cc99997eb548d5403762edc1bae356 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Wed, 22 Jun 2022 17:38:01 -0400 Subject: [PATCH 03/18] restore workflow specs (changed wf instead to match existing wf names), updated wf version list --- chalicelib/checks/helpers/wfr_utils.py | 2 +- chalicelib/checks/helpers/wfrset_utils.py | 12 ++++++------ chalicelib/checks/wfr_encode_checks.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py index 5ede83f5..34995a92 100644 --- a/chalicelib/checks/helpers/wfr_utils.py +++ b/chalicelib/checks/helpers/wfr_utils.py @@ -196,7 +196,7 @@ # OFFICIAL 'ATAC-seq': ['ENCODE_ATAC_Pipeline_1.1.1'], # OFFICIAL - 'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1'], + 'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1', 'ENCODE_ChIP_Pipeline_1.1.2'], # OFFICIAL 'RNA-seq': ['ENCODE_RNAseq_Pipeline_1.1'], 'single cell Repli-seq': [''], diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py index 8be80d27..c13cfd87 100644 --- a/chalicelib/checks/helpers/wfrset_utils.py +++ b/chalicelib/checks/helpers/wfrset_utils.py @@ -194,8 +194,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-aln-alpha", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-alpha", + "app_name": "encode-chipseq-aln-chip", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-chip", "parameters": {}, "config": {}, 'custom_pf_fields': { @@ -212,8 +212,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-aln-ctl-alpha", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl-alpha", + "app_name": "encode-chipseq-aln-ctl", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl", "parameters": {}, "config": {}, 'custom_pf_fields': { @@ -225,8 +225,8 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): } }, { - "app_name": "encode-chipseq-postaln-alpha", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln-alpha", + "app_name": "encode-chipseq-postaln", + "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln", "parameters": {}, "config": {}, 'custom_pf_fields': { diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index d0f30a10..4670894c 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -50,9 +50,9 @@ def chipseq_status(connection, **kwargs): # for control sets, run step1c on each experiment and finish # for non-control sets, run step1 on each experiment, check if control is ready, run step2 on set step0_name = 'merge-fastq' - step1_name = 'encode-chipseq-aln-alpha' - step1c_name = 'encode-chipseq-aln-ctl-alpha' - step2_name = 'encode-chipseq-postaln-alpha' + step1_name = 'encode-chipseq-aln-chip' + step1c_name = 'encode-chipseq-aln-ctl' + step2_name = 'encode-chipseq-postaln' for a_set in res: set_acc = a_set['accession'] From e3c640de3fbaa3f69ac8b45fea89b74b5f8d4ca5 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Wed, 22 Jun 2022 18:29:48 -0400 Subject: [PATCH 04/18] added wf v1.1.2-specific parameters, specified new wf uuid in wfrset_utils --- chalicelib/checks/helpers/wfrset_utils.py | 2 +- chalicelib/checks/wfr_encode_checks.py | 28 +++++++++-------------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py index c13cfd87..f442da89 100644 --- a/chalicelib/checks/helpers/wfrset_utils.py +++ b/chalicelib/checks/helpers/wfrset_utils.py @@ -195,7 +195,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): }, { "app_name": "encode-chipseq-aln-chip", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-chip", + "workflow_uuid": "212a9c91-25d6-473f-b56b-8dd93958c580", "parameters": {}, "config": {}, 'custom_pf_fields': { diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 4670894c..2f75abde 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -183,15 +183,19 @@ def chipseq_status(connection, **kwargs): if organism == 'human': org = 'hs' input_files['chip.bwa_idx_tar'] = '/files-reference/4DNFIZQB369V/' + input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFIMQPTYDY/' input_files['chip.blacklist'] = '/files-reference/4DNFIZ1TGJZR/' input_files['chip.chrsz'] = '/files-reference/4DNFIZJB62D1/' - input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}} + input_file['chip.ref_fa'] = '/files-reference/files-reference/4DNFI823L888/' + input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}, {"chip.bowtie2_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.bowtie2Index.tar"}} if organism == 'mouse': org = 'mm' input_files['chip.bwa_idx_tar'] = '/files-reference/4DNFIZ2PWCC2/' + input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFI2493SDN/' input_files['chip.blacklist'] = '/files-reference/4DNFIZ3FBPK8/' input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' - input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}} + input_file['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' + input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.bowtie2Index.tar"} # step1 Parameters parameters = {} parameters["chip.gensz"] = org @@ -213,12 +217,8 @@ def chipseq_status(connection, **kwargs): input_files['chip.ctl_fastqs'] = exp_files control_parameters = { "chip.pipeline_type": 'tf', - "chip.choose_ctl.always_use_pooled_ctl": True, - "chip.bam2ta_ctl.regex_grep_v_ta": "chr[MUE]|random|alt", - "chip.bwa_ctl.cpu": 8, - "chip.merge_fastq_ctl.cpu": 8, - "chip.filter_ctl.cpu": 8, - "chip.bam2ta_ctl.cpu": 8, + "chip.always_use_pooled_ctl": True, + "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", "chip.align_only": True } parameters.update(control_parameters) @@ -247,13 +247,8 @@ def chipseq_status(connection, **kwargs): input_files['chip.fastqs'] = exp_files exp_parameters = { "chip.pipeline_type": target_type, - "chip.choose_ctl.always_use_pooled_ctl": True, - "chip.bam2ta.regex_grep_v_ta": "chr[MUE]|random|alt", - "chip.bwa.cpu": 8, - "chip.merge_fastq.cpu": 8, - "chip.filter.cpu": 8, - "chip.bam2ta.cpu": 8, - "chip.xcor.cpu": 8, + "chip.always_use_pooled_ctl": True, + "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", "chip.align_only": True } parameters.update(exp_parameters) @@ -393,8 +388,7 @@ def rename_chip(input_at_id_list): "chip.paired_end": chip_p, "chip.choose_ctl.always_use_pooled_ctl": True, "chip.qc_report.desc": run_ids['desc'], - "chip.gensz": org, - "chip.xcor.cpu": 4, + "chip.gensz": org } if paired == 'single': frag_temp = [300] From 3697854b536d4f4c86e62f3618175acbaa30df22 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Wed, 22 Jun 2022 18:34:14 -0400 Subject: [PATCH 05/18] bracket mismatch typo --- chalicelib/checks/wfr_encode_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 2f75abde..371bff38 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -187,7 +187,7 @@ def chipseq_status(connection, **kwargs): input_files['chip.blacklist'] = '/files-reference/4DNFIZ1TGJZR/' input_files['chip.chrsz'] = '/files-reference/4DNFIZJB62D1/' input_file['chip.ref_fa'] = '/files-reference/files-reference/4DNFI823L888/' - input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}, {"chip.bowtie2_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.bowtie2Index.tar"}} + input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.bowtie2Index.tar"}} if organism == 'mouse': org = 'mm' input_files['chip.bwa_idx_tar'] = '/files-reference/4DNFIZ2PWCC2/' @@ -195,7 +195,7 @@ def chipseq_status(connection, **kwargs): input_files['chip.blacklist'] = '/files-reference/4DNFIZ3FBPK8/' input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' input_file['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' - input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.bowtie2Index.tar"} + input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.bowtie2Index.tar"}} # step1 Parameters parameters = {} parameters["chip.gensz"] = org From 8b14c2f81cfadf6dde6b0853c9c889596812306e Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Wed, 22 Jun 2022 18:36:28 -0400 Subject: [PATCH 06/18] chip parameter step1/1c typo --- chalicelib/checks/wfr_encode_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 371bff38..b230caa9 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -186,7 +186,7 @@ def chipseq_status(connection, **kwargs): input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFIMQPTYDY/' input_files['chip.blacklist'] = '/files-reference/4DNFIZ1TGJZR/' input_files['chip.chrsz'] = '/files-reference/4DNFIZJB62D1/' - input_file['chip.ref_fa'] = '/files-reference/files-reference/4DNFI823L888/' + input_files['chip.ref_fa'] = '/files-reference/files-reference/4DNFI823L888/' input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.bowtie2Index.tar"}} if organism == 'mouse': org = 'mm' @@ -194,7 +194,7 @@ def chipseq_status(connection, **kwargs): input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFI2493SDN/' input_files['chip.blacklist'] = '/files-reference/4DNFIZ3FBPK8/' input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' - input_file['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' + input_files['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.bowtie2Index.tar"}} # step1 Parameters parameters = {} From a1bbbc9db7f601319eea0a21368514ed6f4d326f Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Fri, 24 Jun 2022 11:49:09 -0400 Subject: [PATCH 07/18] further simplify fastqs input array, set reference files with names which work in other ff envs --- chalicelib/checks/wfr_encode_checks.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index b230caa9..7af71322 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -186,12 +186,12 @@ def chipseq_status(connection, **kwargs): input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFIMQPTYDY/' input_files['chip.blacklist'] = '/files-reference/4DNFIZ1TGJZR/' input_files['chip.chrsz'] = '/files-reference/4DNFIZJB62D1/' - input_files['chip.ref_fa'] = '/files-reference/files-reference/4DNFI823L888/' + input_files['chip.ref_fa'] = '/files-reference/4DNFI823L888/' input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.bowtie2Index.tar"}} if organism == 'mouse': org = 'mm' input_files['chip.bwa_idx_tar'] = '/files-reference/4DNFIZ2PWCC2/' - input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFI2493SDN/' + input_files['chip.bowtie2_idx_tar'] = '63e22058-79c6-4e24-8231-ca4afac29dda' input_files['chip.blacklist'] = '/files-reference/4DNFIZ3FBPK8/' input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' input_files['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' @@ -213,12 +213,13 @@ def chipseq_status(connection, **kwargs): # input_files = {'chip.ctl_fastqs': [exp_files]} # exp_files is of the form [[Files]] - # for v1.1.1 chip.ctl_fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [[Files]] - input_files['chip.ctl_fastqs'] = exp_files + # for v1.1.1 chip.ctl_fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [Files] + input_files['chip.ctl_fastqs'] = exp_files[0] control_parameters = { "chip.pipeline_type": 'tf', "chip.always_use_pooled_ctl": True, "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", + "chip.mito_chr_name": "chrM", "chip.align_only": True } parameters.update(control_parameters) @@ -243,12 +244,13 @@ def chipseq_status(connection, **kwargs): else: # input_files = {'chip.fastqs': [exp_files]} # exp_files is of the form [[Files]] - # for v1.1.1 chip.fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [[Files]] - input_files['chip.fastqs'] = exp_files + # for v1.1.1 chip.fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [Files] + input_files['chip.fastqs'] = exp_files[0] exp_parameters = { "chip.pipeline_type": target_type, "chip.always_use_pooled_ctl": True, "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", + "chip.mito_chr_name": "chrM", "chip.align_only": True } parameters.update(exp_parameters) From f837a9b291f42b746d874af28e7d14a5ac4ed2f6 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Thu, 30 Jun 2022 11:38:51 -0400 Subject: [PATCH 08/18] ChIP check I/O and parameters modifications for aln (ctl) and post-aln wfs --- chalicelib/checks/helpers/wfrset_utils.py | 8 +++--- chalicelib/checks/wfr_encode_checks.py | 31 +++++++++++------------ 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py index f442da89..e0ce36c6 100644 --- a/chalicelib/checks/helpers/wfrset_utils.py +++ b/chalicelib/checks/helpers/wfrset_utils.py @@ -213,7 +213,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): }, { "app_name": "encode-chipseq-aln-ctl", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl", + "workflow_uuid": "4eb427f1-a7d5-4d74-8cfa-4c77f42d5b43", "parameters": {}, "config": {}, 'custom_pf_fields': { @@ -226,9 +226,9 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): }, { "app_name": "encode-chipseq-postaln", - "workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln", + "workflow_uuid": "291d4c64-75de-434a-9d98-01f40d19e15e", "parameters": {}, - "config": {}, + "config": {"instance_type": "c5.2xlarge", "ebs_size": 100}, 'custom_pf_fields': { 'chip.optimal_peak': { 'genome_assembly': genome, @@ -238,7 +238,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): 'genome_assembly': genome, 'file_type': 'conservative peaks', 'description': 'Conservative peak calls from ENCODE ChIP-Seq Pipeline'}, - 'chip.sig_fc': { + 'chip.fc_bw': { 'genome_assembly': genome, 'file_type': 'signal fold change', 'description': 'ChIP-seq signal fold change over input control'} diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 7af71322..5c8f45f8 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -126,7 +126,6 @@ def chipseq_status(connection, **kwargs): continue # collect results from step1 runs for step2 ta = [] - taxcor = [] ta_cnt = [] # track if all experiments completed step0 and step1 ready_for_step2 = True @@ -260,16 +259,14 @@ def chipseq_status(connection, **kwargs): # if complete, step1_output will have a list of 2 files, first_ta, and fist_ta_xcor keep, step1_status, step1_output = wfr_utils.stepper(library, keep, 'step1', s1_tag, exp_files, - s1_input_files, step1_name, ['chip.first_ta', 'chip.first_ta_xcor'], + s1_input_files, step1_name, ['chip.first_ta'], additional_input={'parameters': parameters}, organism=organism) if step1_status == 'complete': exp_ta_file = step1_output[0] - exp_taxcor_file = step1_output[1] # accumulate files to patch on experiment patch_data = [exp_ta_file, ] complete['patch_opf'].append([exp_id, patch_data]) ta.append(exp_ta_file) - taxcor.append(exp_taxcor_file) # find the control file if there is a control set found if control_set: @@ -333,14 +330,9 @@ def chipseq_status(connection, **kwargs): continue if len(ta) > 2: ta_2 = [] - taxcor_2 = [] print('ExperimentSet has 3 experiments, selecting best 2') ta_2 = wfr_utils.select_best_2(ta, all_files, all_qcs) - # xcor does not have qc, use ta indexes to find the correct files - for ta_f in ta_2: - taxcor_2.append(taxcor[ta.index(ta_f)]) ta = ta_2 - taxcor = taxcor_2 # for control files ,also select best2 ta_cnt = wfr_utils.select_best_2(ta_cnt, all_files, all_qcs) @@ -350,10 +342,15 @@ def chipseq_status(connection, **kwargs): org = 'hs' s2_input_files['chip.blacklist'] = '/files-reference/4DNFIZ1TGJZR/' s2_input_files['chip.chrsz'] = '/files-reference/4DNFIZJB62D1/' + s2_input_files['chip.ref_fa'] = '/files-reference/4DNFI823L888/' + s2_input_files['chip.bowtie2_idx_tar'] = '/files-reference/4DNFIMQPTYDY/' + if organism == 'mouse': org = 'mm' s2_input_files['chip.blacklist'] = '/files-reference/4DNFIZ3FBPK8/' s2_input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' + s2_input_files['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' + s2_input_files['chip.bowtie2_idx_tar'] = '63e22058-79c6-4e24-8231-ca4afac29dda' def rename_chip(input_at_id_list): # rename bed.gz to tagAlign.gz @@ -363,11 +360,12 @@ def rename_chip(input_at_id_list): renamed.append(acc + '.tagAlign.gz') return renamed + # not used in new pipeline, but used in benchmarking + s2_input_files['chip.bam2ta_no_filt_R1.ta'] = ta + s2_input_files['additional_file_parameters'] = {} s2_input_files['chip.tas'] = ta s2_input_files['additional_file_parameters']['chip.tas'] = {"rename": rename_chip(ta)} - s2_input_files['chip.bam2ta_no_filt_R1.ta'] = taxcor - s2_input_files['additional_file_parameters']['chip.bam2ta_no_filt_R1.ta'] = {"rename": rename_chip(taxcor)} if ta_cnt: s2_input_files['chip.ctl_tas'] = ta_cnt s2_input_files['additional_file_parameters']['chip.ctl_tas'] = {"rename": rename_chip(ta_cnt)} @@ -388,8 +386,9 @@ def rename_chip(input_at_id_list): parameters = { "chip.pipeline_type": target_type, "chip.paired_end": chip_p, - "chip.choose_ctl.always_use_pooled_ctl": True, - "chip.qc_report.desc": run_ids['desc'], + "chip.always_use_pooled_ctl": True, + "chip.mito_chr_name": "chrM", + "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", "chip.gensz": org } if paired == 'single': @@ -407,14 +406,14 @@ def rename_chip(input_at_id_list): keep, step2_status, step2_output = wfr_utils.stepper(library, keep, 'step2', s2_tag, ta, s2_input_files, step2_name, - ['chip.optimal_peak', 'chip.conservative_peak', 'chip.sig_fc'], + ['chip.optimal_peak', 'chip.conservative_peak', 'chip.fc_bw'], additional_input={'parameters': parameters}, organism=organism) if step2_status == 'complete': set_opt_peak = step2_output[0] set_cons_peak = step2_output[1] - set_sig_fc = step2_output[2] + set_fc_bw = step2_output[2] # accumulate files to patch on experiment - patch_data = [set_opt_peak, set_cons_peak, set_sig_fc] + patch_data = [set_opt_peak, set_cons_peak, set_fc_bw] complete['patch_opf'].append([set_acc, patch_data]) complete['add_tag'] = [set_acc, tag] all_completed = True From 7aeed9a17ad94e860d6e5c32659759c166a5d8b6 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Thu, 30 Jun 2022 14:42:05 -0400 Subject: [PATCH 09/18] add back original to accepted versions --- chalicelib/checks/helpers/wfr_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py index 34995a92..b60b805e 100644 --- a/chalicelib/checks/helpers/wfr_utils.py +++ b/chalicelib/checks/helpers/wfr_utils.py @@ -74,15 +74,15 @@ }, "encode-chipseq-aln-chip": { "run_time": 200, - "accepted_versions": ["1.1.2"] + "accepted_versions": ["1.1.1", "1.1.2"] }, "encode-chipseq-aln-ctl": { "run_time": 200, - "accepted_versions": ["1.1.2"] + "accepted_versions": ["1.1.1", "1.1.2"] }, "encode-chipseq-postaln": { "run_time": 200, - "accepted_versions": ["1.1.2"] + "accepted_versions": ["1.1.1", "1.1.2"] }, "encode-atacseq-aln": { "run_time": 200, From 34838bc1f3cd066cc20d2bda3825a75c636b486f Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Thu, 7 Jul 2022 14:57:41 -0400 Subject: [PATCH 10/18] change chip-seq to use wdl's control parameters (rm ctl prefix from fastqs) --- chalicelib/checks/wfr_encode_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 5c8f45f8..e2cfebd9 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -213,9 +213,9 @@ def chipseq_status(connection, **kwargs): # exp_files is of the form [[Files]] # for v1.1.1 chip.ctl_fastqs = [exp_files] ([[[Files]]]), for v2.1.6, just [Files] - input_files['chip.ctl_fastqs'] = exp_files[0] + input_files['chip.fastqs'] = exp_files[0] control_parameters = { - "chip.pipeline_type": 'tf', + "chip.pipeline_type": 'control', "chip.always_use_pooled_ctl": True, "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", "chip.mito_chr_name": "chrM", From d90b16fbea1cb1abcf333455e16dc602d902bf31 Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Fri, 8 Jul 2022 10:10:27 -0400 Subject: [PATCH 11/18] override benchmarking for chip ctl wf --- chalicelib/checks/helpers/wfrset_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py index e0ce36c6..e5b0b1c1 100644 --- a/chalicelib/checks/helpers/wfrset_utils.py +++ b/chalicelib/checks/helpers/wfrset_utils.py @@ -215,7 +215,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): "app_name": "encode-chipseq-aln-ctl", "workflow_uuid": "4eb427f1-a7d5-4d74-8cfa-4c77f42d5b43", "parameters": {}, - "config": {}, + "config": {"instance_type": "c5.2xlarge", "ebs_size": 100}, 'custom_pf_fields': { 'chip.first_ta_ctl': { 'genome_assembly': genome, From c4f5514f7cd0c6798e210539ece701fe0e53d41f Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Thu, 21 Jul 2022 12:15:59 -0400 Subject: [PATCH 12/18] added bool to get_chip_files to allow different endedness; updated usage in wfr_encode checks --- chalicelib/checks/helpers/wfr_utils.py | 18 +++++++++--------- chalicelib/checks/wfr_encode_checks.py | 14 +++++++++++--- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py index b60b805e..3527f192 100644 --- a/chalicelib/checks/helpers/wfr_utils.py +++ b/chalicelib/checks/helpers/wfr_utils.py @@ -2102,9 +2102,9 @@ def get_chip_info(f_exp_resp, all_items): return control, control_set, target_type, organism -def get_chip_files(exp_resp, all_files): +def get_chip_files(exp_resp, all_files, isChip): files = [] - paired = "" + paired = [] exp_files = exp_resp['files'] for a_file in exp_files: f_t = [] @@ -2112,7 +2112,7 @@ def get_chip_files(exp_resp, all_files): # get pair end no pair_end = file_resp.get('paired_end') if pair_end == '2': - paired = 'paired' + paired.append('paired') continue # get paired file paired_with = "" @@ -2122,22 +2122,22 @@ def get_chip_files(exp_resp, all_files): else: for relation in relations: if relation['relationship_type'] == 'paired with': - paired = 'paired' + paired.append('paired') paired_with = relation['file']['@id'] # decide if data is not paired end reads if not paired_with: if not paired: - paired = 'single' - else: - if paired != 'single': - print('inconsistent fastq pair info') - continue + paired.append('single') f_t.append(file_resp['@id']) else: f2 = [i for i in all_files if i['@id'] == paired_with][0] f_t.append(file_resp['@id']) f_t.append(f2['@id']) files.append(f_t) + + # needs to output a string for non-ChIP-seq usage + if not isChip: + paired = paired[0] return files, paired diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index e2cfebd9..2d0bd49c 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -127,6 +127,7 @@ def chipseq_status(connection, **kwargs): # collect results from step1 runs for step2 ta = [] ta_cnt = [] + paired_ends = [] # track if all experiments completed step0 and step1 ready_for_step2 = True for an_exp in replicate_exps: @@ -136,7 +137,7 @@ def chipseq_status(connection, **kwargs): control_ready = True exp_id = an_exp['replicate_exp']['accession'] exp_resp = [i for i in all_items['experiment_seq'] if i['accession'] == exp_id][0] - exp_files, paired = wfr_utils.get_chip_files(exp_resp, all_files) + exp_files, paired = wfr_utils.get_chip_files(exp_resp, all_files, True) # if there are more then 2 files, we need to merge: print(exp_id, len(exp_files), paired) # if too many input, merge them @@ -147,8 +148,10 @@ def chipseq_status(connection, **kwargs): # first add paired end 1s input_list.append([i[0] for i in exp_files]) input_list.append([i[1] for i in exp_files]) + paired_ends.append('true') elif paired == 'single': input_list.append([i[0] for i in exp_files]) + paired_ends.append('false') # collect files for step1 and step1c merged_files = [] step0_status = 'complete' @@ -205,6 +208,8 @@ def chipseq_status(connection, **kwargs): parameters['chip.paired_end'] = False elif paired == 'paired': parameters['chip.paired_end'] = True + else: + parameters['chip.paired_ends'] = [True if pe=="paired" else False for pe in paired] # run step1 for control if control: @@ -385,12 +390,15 @@ def rename_chip(input_at_id_list): run_ids = {'desc': set_acc + a_set.get('description', '')} parameters = { "chip.pipeline_type": target_type, - "chip.paired_end": chip_p, "chip.always_use_pooled_ctl": True, "chip.mito_chr_name": "chrM", "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", "chip.gensz": org } + if paired == 'paired' or paired == 'single': + parameters['chip.paired_end'] = chip_p + else: + parameters['chip.paired_ends'] = paired if paired == 'single': frag_temp = [300] fraglist = frag_temp * len(ta) @@ -596,7 +604,7 @@ def atacseq_status(connection, **kwargs): exp_id = an_exp['replicate_exp']['accession'] exp_resp = [i for i in all_items['experiment_atacseq'] if i['accession'] == exp_id][0] # exp_files [[pair1,pair2], [pair1, pair2]] - exp_files, paired = wfr_utils.get_chip_files(exp_resp, all_files) + exp_files, paired = wfr_utils.get_chip_files(exp_resp, all_files, False) # if there are more then 2 files, we need to merge: print(exp_id, len(exp_files), paired) # if too many input, merge them From 7e463657c015bafb24dd9d878374775cf421678f Mon Sep 17 00:00:00 2001 From: clarabakker <57916928+clarabakker@users.noreply.github.com> Date: Fri, 29 Jul 2022 14:08:08 -0400 Subject: [PATCH 13/18] change paired array to booleans for ChIP-seq post-align --- chalicelib/checks/wfr_encode_checks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index 2d0bd49c..c6bae51e 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -381,6 +381,7 @@ def rename_chip(input_at_id_list): chip_p = False elif paired == 'paired': chip_p = True + if not control_set: if target_type == 'histone': set_summary += "| skipped - histone without control needs attention, ie change to tf" @@ -398,7 +399,7 @@ def rename_chip(input_at_id_list): if paired == 'paired' or paired == 'single': parameters['chip.paired_end'] = chip_p else: - parameters['chip.paired_ends'] = paired + parameters['chip.paired_ends'] = [True if pe=="paired" else False for pe in paired] if paired == 'single': frag_temp = [300] fraglist = frag_temp * len(ta) From 4e15ca911f0aef603ac9328006a7df488862c13d Mon Sep 17 00:00:00 2001 From: Clara <57916928+clarabakker@users.noreply.github.com> Date: Mon, 8 Aug 2022 11:50:11 -0400 Subject: [PATCH 14/18] update ChIP-seq pipeline versions --- chalicelib/checks/helpers/wfr_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py index 3527f192..00542843 100644 --- a/chalicelib/checks/helpers/wfr_utils.py +++ b/chalicelib/checks/helpers/wfr_utils.py @@ -74,15 +74,15 @@ }, "encode-chipseq-aln-chip": { "run_time": 200, - "accepted_versions": ["1.1.1", "1.1.2"] + "accepted_versions": ["1.1.1", "2.1.6"] }, "encode-chipseq-aln-ctl": { "run_time": 200, - "accepted_versions": ["1.1.1", "1.1.2"] + "accepted_versions": ["1.1.1", "2.1.6"] }, "encode-chipseq-postaln": { "run_time": 200, - "accepted_versions": ["1.1.1", "1.1.2"] + "accepted_versions": ["1.1.1", "2.1.6"] }, "encode-atacseq-aln": { "run_time": 200, @@ -196,7 +196,7 @@ # OFFICIAL 'ATAC-seq': ['ENCODE_ATAC_Pipeline_1.1.1'], # OFFICIAL - 'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1', 'ENCODE_ChIP_Pipeline_1.1.2'], + 'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1', 'ENCODE_ChIP_Pipeline_2.1.6'], # OFFICIAL 'RNA-seq': ['ENCODE_RNAseq_Pipeline_1.1'], 'single cell Repli-seq': [''], From 04aabe793dc16767a67d7ea8ce2ab3d486e52256 Mon Sep 17 00:00:00 2001 From: Clara <57916928+clarabakker@users.noreply.github.com> Date: Fri, 2 Dec 2022 14:12:13 -0500 Subject: [PATCH 15/18] adjustments for ChIP-seq benchmarking, new WDL parameters for paired endedness --- chalicelib/checks/wfr_encode_checks.py | 32 +++++++++---- poetry.lock | 62 ++++++++++++++------------ pyproject.toml | 1 + 3 files changed, 58 insertions(+), 37 deletions(-) diff --git a/chalicelib/checks/wfr_encode_checks.py b/chalicelib/checks/wfr_encode_checks.py index c6bae51e..3512bdbc 100644 --- a/chalicelib/checks/wfr_encode_checks.py +++ b/chalicelib/checks/wfr_encode_checks.py @@ -198,9 +198,10 @@ def chipseq_status(connection, **kwargs): input_files['chip.chrsz'] = '/files-reference/4DNFIBP173GC/' input_files['chip.ref_fa'] = '/files-reference/4DNFIC1NWMVJ/' input_files['additional_file_parameters'] = {"chip.bwa_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar"}, "chip.bowtie2_idx_tar": {"rename": "mm10_no_alt_analysis_set_ENCODE.bowtie2Index.tar"}} - # step1 Parameters + # step1 parameters parameters = {} parameters["chip.gensz"] = org + parameters["chip.filter_chrs"] = ["chr[MUE]","random","alt"] if paired == 'single': frag_temp = [300] fraglist = frag_temp * len(exp_files) @@ -222,7 +223,7 @@ def chipseq_status(connection, **kwargs): control_parameters = { "chip.pipeline_type": 'control', "chip.always_use_pooled_ctl": True, - "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", + "chip.regex_bfilt_peak_chr_name": "chr[\dXY]+", "chip.mito_chr_name": "chrM", "chip.align_only": True } @@ -232,7 +233,7 @@ def chipseq_status(connection, **kwargs): s1c_tag = exp_id keep, step1c_status, step1c_output = wfr_utils.stepper(library, keep, 'step1c', s1c_tag, exp_files, - s1c_input_files, step1c_name, 'chip.first_ta_ctl', + s1c_input_files, step1c_name, 'chip.first_ta', additional_input={'parameters': parameters}, organism=organism) if step1c_status == 'complete': # accumulate files to patch on experiment @@ -253,7 +254,7 @@ def chipseq_status(connection, **kwargs): exp_parameters = { "chip.pipeline_type": target_type, "chip.always_use_pooled_ctl": True, - "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", + "chip.regex_bfilt_peak_chr_name": "chr[\dXY]+", "chip.mito_chr_name": "chrM", "chip.align_only": True } @@ -287,7 +288,7 @@ def chipseq_status(connection, **kwargs): print('Multiple controls for this exp', exp_id) continue exp_cnt_id = exp_cnt_ids[0] - print('controled by set', exp_cnt_id) + print('controlled by set', exp_cnt_id) # have to do a get for the control experiment exp_cnt_resp = [i for i in all_items['experiment_seq'] if i['@id'] == exp_cnt_id][0] cont_file = '' @@ -365,9 +366,6 @@ def rename_chip(input_at_id_list): renamed.append(acc + '.tagAlign.gz') return renamed - # not used in new pipeline, but used in benchmarking - s2_input_files['chip.bam2ta_no_filt_R1.ta'] = ta - s2_input_files['additional_file_parameters'] = {} s2_input_files['chip.tas'] = ta s2_input_files['additional_file_parameters']['chip.tas'] = {"rename": rename_chip(ta)} @@ -393,13 +391,29 @@ def rename_chip(input_at_id_list): "chip.pipeline_type": target_type, "chip.always_use_pooled_ctl": True, "chip.mito_chr_name": "chrM", - "chip.regex_bfilt_peak_chr_name": "chr[MUE]|random|alt", + "chip.regex_bfilt_peak_chr_name": "chr[\dXY]+", "chip.gensz": org } if paired == 'paired' or paired == 'single': parameters['chip.paired_end'] = chip_p + parameters['chip.ctl_paired_end'] = chip_p + + # assumes paired is instead a list + # if all strings are the same, define paired_end using the first string + elif len(set(paired)) == 1: + chip_p = (paired[0] == 'paired') + parameters['chip.paired_end'] = chip_p + parameters['chip.ctl_paired_end'] = chip_p + + # in the case of neither, define paired_ends else: + print("Mixed endedness here!") parameters['chip.paired_ends'] = [True if pe=="paired" else False for pe in paired] + parameters['chip.ctl_paired_ends'] = [True if pe=="paired" else False for pe in paired] + parameters['chip.ctl_depth_limit'] = 0 + parameters['chip.exp_ctl_depth_limit'] = 0 + parameters['ctl_subsample_reads'] = 15000000 + if paired == 'single': frag_temp = [300] fraglist = frag_temp * len(ta) diff --git a/poetry.lock b/poetry.lock index 0254fc4f..83bf5b0d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -23,10 +23,10 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] +dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"] +tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"] [[package]] name = "aws-requests-auth" @@ -56,11 +56,11 @@ lxml = ["lxml"] [[package]] name = "benchmark-4dn" -version = "0.5.15" -description = "Benchmark functions that returns total space, mem, cpu given input size and parameters for the CWL workflows" +version = "0.5.18" +description = "Benchmark functions that returns total space, mem, cpu given input size and parameters for the CWL workflows" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.5,<4.0" [[package]] name = "blessed" @@ -233,7 +233,7 @@ websocket-client = ">=0.32.0" [package.extras] ssh = ["paramiko (>=2.4.2)"] -tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"] +tls = ["cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=17.5.0)"] [[package]] name = "docutils" @@ -255,7 +255,7 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <4" urllib3 = ">=1.21.1" [package.extras] -develop = ["requests (>=2.0.0,<3.0.0)", "nose", "coverage", "mock", "pyyaml", "nosexcover", "numpy", "pandas", "sphinx (<1.7)", "sphinx-rtd-theme"] +develop = ["coverage", "mock", "nose", "nosexcover", "numpy", "pandas", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx (<1.7)", "sphinx-rtd-theme"] requests = ["requests (>=2.4.0,<3.0.0)"] [[package]] @@ -272,7 +272,7 @@ python-dateutil = "*" six = "*" [package.extras] -develop = ["mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "coverage (<5.0.0)", "sphinx", "sphinx-rtd-theme"] +develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "sphinx", "sphinx-rtd-theme"] [[package]] name = "flaky" @@ -399,7 +399,7 @@ rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} six = ">=1.9.0" [package.extras] -aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"] pyopenssl = ["pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] @@ -489,9 +489,9 @@ typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} zipp = ">=0.5" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pep517", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-perf (>=0.9.2)"] [[package]] name = "inquirer" @@ -624,7 +624,7 @@ python-dateutil = ">=2.7.3" pytz = ">=2017.2" [package.extras] -test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"] +test = ["hypothesis (>=3.58)", "pytest (>=4.0.2)", "pytest-xdist"] [[package]] name = "pluggy" @@ -916,10 +916,10 @@ python-versions = "*" six = "*" [package.extras] -azure-pipelines = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "simplejson", "pytest-azurepipelines", "python-rapidjson"] -dev = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "simplejson", "sphinx", "twisted", "pre-commit", "python-rapidjson"] +azure-pipelines = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "pytest-azurepipelines", "python-rapidjson", "simplejson"] +dev = ["coverage", "freezegun (>=0.2.8)", "pre-commit", "pretend", "pytest (>=3.3.0)", "python-rapidjson", "simplejson", "sphinx", "twisted"] docs = ["sphinx", "twisted"] -tests = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "simplejson", "python-rapidjson"] +tests = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "python-rapidjson", "simplejson"] [[package]] name = "tibanna" @@ -990,8 +990,8 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" [package.extras] -brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -1004,7 +1004,7 @@ python-versions = ">=3.6.0" [package.extras] docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.9)"] -testing = ["pytest", "pytest-cover", "coverage (>=5.0)"] +testing = ["coverage (>=5.0)", "pytest", "pytest-cover"] [[package]] name = "wcwidth" @@ -1024,7 +1024,7 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*" [package.extras] docs = ["Sphinx (>=1.7.5)", "pylons-sphinx-themes"] -testing = ["pytest (>=3.1.0)", "coverage", "pytest-cov", "pytest-xdist"] +testing = ["coverage", "pytest (>=3.1.0)", "pytest-cov", "pytest-xdist"] [[package]] name = "websocket-client" @@ -1055,7 +1055,7 @@ WebOb = ">=1.2" [package.extras] docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.8)"] -tests = ["nose (<1.3.0)", "coverage", "mock", "pastedeploy", "wsgiproxy2", "pyquery"] +tests = ["coverage", "mock", "nose (<1.3.0)", "pastedeploy", "pyquery", "wsgiproxy2"] [[package]] name = "zipp" @@ -1066,13 +1066,13 @@ optional = false python-versions = ">=3.6" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] -testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] +docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] +testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"] [metadata] lock-version = "1.1" python-versions = ">=3.6.1,<3.8" -content-hash = "dbd83a8278782c21e4229091dc08a61bfc350f898f85385f41655593d6e349b5" +content-hash = "ceaff566fac8dcc1ef419a2e861e2be01be83d30a22d87d43cd2b2e9f32b0b3a" [metadata.files] ansicon = [ @@ -1096,9 +1096,8 @@ beautifulsoup4 = [ {file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"}, ] benchmark-4dn = [ - {file = "Benchmark-4dn-0.5.15.tar.gz", hash = "sha256:941ca3893cacf6ed96e9f10ceeec3477455acabc07c4cdcaf453b51ad9c51ca8"}, - {file = "Benchmark_4dn-0.5.15-py3-none-any.whl", hash = "sha256:bb40b54691229837cdc320ea92e554f046e96349c9d62beafd82faf73777a610"}, - {file = "Benchmark_4dn-0.5.15-py3.6.egg", hash = "sha256:3db398268fe2f4524584bbec521fe57e73eba0733de08c06a28806fea178c480"}, + {file = "Benchmark-4dn-0.5.18.tar.gz", hash = "sha256:6a2c9eba74031ce89ef9b2433e995ceccab6839e11c18875863b4f17e486cd7c"}, + {file = "benchmark_4dn-0.5.18-py3-none-any.whl", hash = "sha256:b03e31ead9c75539bca5aec769f88d27218bf2f267ad4c5d9b7424af5a1e1da4"}, ] blessed = [ {file = "blessed-1.19.0-py2.py3-none-any.whl", hash = "sha256:1f2d462631b2b6d2d4c3c65b54ef79ad87a6ca2dd55255df2f8d739fcc8a1ddb"}, @@ -1548,6 +1547,13 @@ pyyaml = [ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, diff --git a/pyproject.toml b/pyproject.toml index faf33da1..209f10d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ pytest = "5.1.2" gspread = ">=3.6.0" oauth2client = ">=4.1.3" pandas = ">=1.1.4" +benchmark-4dn = "^0.5.17" [tool.poetry.dev-dependencies] chalice = "^1.26.0" From 081242c446689062c5c8abd2aeecab7eecb9e0bb Mon Sep 17 00:00:00 2001 From: Clara <57916928+clarabakker@users.noreply.github.com> Date: Mon, 27 Mar 2023 18:23:53 -0400 Subject: [PATCH 16/18] qc get failure handling --- .../checks/helpers/wfr_utils.py | 17 ++++++++++++----- .../checks/helpers/wfrset_utils.py | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/chalicelib_fourfront/checks/helpers/wfr_utils.py b/chalicelib_fourfront/checks/helpers/wfr_utils.py index a586251b..b54cecf8 100644 --- a/chalicelib_fourfront/checks/helpers/wfr_utils.py +++ b/chalicelib_fourfront/checks/helpers/wfr_utils.py @@ -2153,15 +2153,22 @@ def select_best_2(file_list, all_files, all_qcs): f_resp = [i for i in all_files if i['@id'] == f][0] qc = f_resp['quality_metric'] qc_resp = [i for i in all_qcs if i['uuid'] == qc['uuid']][0] - try: - score = qc_resp['nodup_flagstat_qc'][0]['mapped'] - except Exception: - score = qc_resp['ctl_nodup_flagstat_qc'][0]['mapped'] + if 'nodup_flagstat_qc' in qc_resp: + try: + score = qc_resp['nodup_flagstat_qc'][0]['mapped'] + except Exception: + score = qc_resp['ctl_nodup_flagstat_qc'][0]['mapped'] + if 'align' in qc_resp: + try: + score = qc_resp['align']['nodup_samstat']['rep1']['mapped_reads'] + except Exception: + score = qc_resp['align']['ctl_nodup_samstat']['rep1']['mapped_reads'] + else: + raise Exception('no mapped qc statistics found') scores.append((score, f)) scores = sorted(scores, key=lambda x: -x[0]) return [scores[0][1], scores[1][1]] - def limit_number_of_runs(check, my_auth): """Checks the number of workflow runs started in the past 6h. Return the number of remaining runs before hitting the rate limit of pulls from Docker diff --git a/chalicelib_fourfront/checks/helpers/wfrset_utils.py b/chalicelib_fourfront/checks/helpers/wfrset_utils.py index e5b0b1c1..29c99bf4 100644 --- a/chalicelib_fourfront/checks/helpers/wfrset_utils.py +++ b/chalicelib_fourfront/checks/helpers/wfrset_utils.py @@ -228,7 +228,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): "app_name": "encode-chipseq-postaln", "workflow_uuid": "291d4c64-75de-434a-9d98-01f40d19e15e", "parameters": {}, - "config": {"instance_type": "c5.2xlarge", "ebs_size": 100}, + "config": {"ebs_size": 100}, 'custom_pf_fields': { 'chip.optimal_peak': { 'genome_assembly': genome, From ae2a94983b5ac73755e7396f3a438851f021f553 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 29 Aug 2023 20:33:40 +0000 Subject: [PATCH 17/18] identify mixed paired/single ended, general cleanup --- .../checks/helpers/wfrset_utils.py | 7 +- .../checks/wfr_encode_checks.py | 71 ++++++++++--------- poetry.lock | 6 +- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/chalicelib_fourfront/checks/helpers/wfrset_utils.py b/chalicelib_fourfront/checks/helpers/wfrset_utils.py index 29c99bf4..8c2f2bfd 100644 --- a/chalicelib_fourfront/checks/helpers/wfrset_utils.py +++ b/chalicelib_fourfront/checks/helpers/wfrset_utils.py @@ -197,7 +197,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): "app_name": "encode-chipseq-aln-chip", "workflow_uuid": "212a9c91-25d6-473f-b56b-8dd93958c580", "parameters": {}, - "config": {}, + "config": {"ebs_size": 70}, 'custom_pf_fields': { 'chip.first_ta': { 'genome_assembly': genome, @@ -215,7 +215,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): "app_name": "encode-chipseq-aln-ctl", "workflow_uuid": "4eb427f1-a7d5-4d74-8cfa-4c77f42d5b43", "parameters": {}, - "config": {"instance_type": "c5.2xlarge", "ebs_size": 100}, + "config":{"instance_type": 'c5.2xlarge', "ebs_size": 70}, 'custom_pf_fields': { 'chip.first_ta_ctl': { 'genome_assembly': genome, @@ -228,7 +228,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): "app_name": "encode-chipseq-postaln", "workflow_uuid": "291d4c64-75de-434a-9d98-01f40d19e15e", "parameters": {}, - "config": {"ebs_size": 100}, + "config": {"instance_type": "c5.2xlarge", "ebs_size": 80}, 'custom_pf_fields': { 'chip.optimal_peak': { 'genome_assembly': genome, @@ -329,6 +329,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None): 'rna.strandedness_direction': '', 'rna.endedness': '' }, + "config": {"instance_type": ["m5a.4xlarge", "m6a.4xlarge"], "ebs_size": 90}, 'custom_pf_fields': { 'rna.outbam': { 'genome_assembly': genome, diff --git a/chalicelib_fourfront/checks/wfr_encode_checks.py b/chalicelib_fourfront/checks/wfr_encode_checks.py index 109b3a4d..50de5aa2 100644 --- a/chalicelib_fourfront/checks/wfr_encode_checks.py +++ b/chalicelib_fourfront/checks/wfr_encode_checks.py @@ -138,20 +138,29 @@ def chipseq_status(connection, **kwargs): exp_id = an_exp['replicate_exp']['accession'] exp_resp = [i for i in all_items['experiment_seq'] if i['accession'] == exp_id][0] exp_files, paired = wfr_utils.get_chip_files(exp_resp, all_files, True) - # if there are more then 2 files, we need to merge: print(exp_id, len(exp_files), paired) - # if too many input, merge them + + # note: expects all files in the same experiment to have the same endedness + paired_ends.append(list(set(paired))[0]) + + # if there are more then 2 input filesets, we need to merge them: if len(exp_files) > 2: - # exp_files format [[pair1,pair2], [pair1, pair2]] @id - input_list = [] - if paired == 'paired': - # first add paired end 1s - input_list.append([i[0] for i in exp_files]) - input_list.append([i[1] for i in exp_files]) - paired_ends.append('true') - elif paired == 'single': - input_list.append([i[0] for i in exp_files]) - paired_ends.append('false') + # exp_files format: [[pair1,pair2], [pair1,pair2]] + # There are more than 2 files, so paired is a list (not string) + # Traverse paired/exp files and assign them for merging + input_list = [[],[],[]] + i = j = 0 + while i < len(paired): + exp = exp_files[j] + if paired[i] == 'paired': + # first add paired end 1s + input_list[0].append(exp_files[j][0]) + input_list[1].append(exp_files[j][1]) + i+=2 + elif paired[i] == 'single': + input_list[2].append(exp_files[0]) + i+=1 + j+=1 # collect files for step1 and step1c merged_files = [] step0_status = 'complete' @@ -160,15 +169,16 @@ def chipseq_status(connection, **kwargs): for merge_case in input_list: merge_enum += 1 # RUN STEP 0 - s0_input_files = {'input_fastqs': merge_case} - s0_tag = exp_id + '_p' + str(merge_enum) - keep, step0_status, step0_output = wfr_utils.stepper(library, keep, + if merge_case: + s0_input_files = {'input_fastqs': merge_case} + s0_tag = exp_id + '_p' + str(merge_enum) + keep, step0_status, step0_output = wfr_utils.stepper(library, keep, 'step0', s0_tag, merge_case, s0_input_files, step0_name, 'merged_fastq', organism=organism) - if step0_status == 'complete': - merged_files.append(step0_output) - else: - ready_for_step1 = False + if step0_status == 'complete': + merged_files.append(step0_output) + else: + ready_for_step1 = False if ready_for_step1: # rewrite exp_files with merged ones @@ -259,7 +269,6 @@ def chipseq_status(connection, **kwargs): "chip.align_only": True } parameters.update(exp_parameters) - s1_input_files = input_files s1_tag = exp_id # if complete, step1_output will have a list of 2 files, first_ta, and fist_ta_xcor @@ -372,13 +381,9 @@ def rename_chip(input_at_id_list): if ta_cnt: s2_input_files['chip.ctl_tas'] = ta_cnt s2_input_files['additional_file_parameters']['chip.ctl_tas'] = {"rename": rename_chip(ta_cnt)} - + # collect parameters parameters = {} - if paired == 'single': - chip_p = False - elif paired == 'paired': - chip_p = True if not control_set: if target_type == 'histone': @@ -394,25 +399,22 @@ def rename_chip(input_at_id_list): "chip.regex_bfilt_peak_chr_name": "chr[\dXY]+", "chip.gensz": org } - if paired == 'paired' or paired == 'single': - parameters['chip.paired_end'] = chip_p - parameters['chip.ctl_paired_end'] = chip_p # assumes paired is instead a list # if all strings are the same, define paired_end using the first string - elif len(set(paired)) == 1: - chip_p = (paired[0] == 'paired') + if len(set(paired_ends)) == 1: + chip_p = (paired_ends[0] == 'paired') parameters['chip.paired_end'] = chip_p parameters['chip.ctl_paired_end'] = chip_p # in the case of neither, define paired_ends else: print("Mixed endedness here!") - parameters['chip.paired_ends'] = [True if pe=="paired" else False for pe in paired] - parameters['chip.ctl_paired_ends'] = [True if pe=="paired" else False for pe in paired] + parameters['chip.paired_ends'] = [True if pe=="paired" else False for pe in paired_ends] + parameters['chip.ctl_paired_ends'] = [True if pe=="paired" else False for pe in paired_ends] parameters['chip.ctl_depth_limit'] = 0 - parameters['chip.exp_ctl_depth_limit'] = 0 - parameters['ctl_subsample_reads'] = 15000000 + # can't automate subsampling + parameters['chip.exp_ctl_depth_ratio_limit'] = 0 if paired == 'single': frag_temp = [300] @@ -432,6 +434,7 @@ def rename_chip(input_at_id_list): ['chip.optimal_peak', 'chip.conservative_peak', 'chip.fc_bw'], additional_input={'parameters': parameters}, organism=organism) if step2_status == 'complete': + print("step2 outputs: ", step2_output) set_opt_peak = step2_output[0] set_cons_peak = step2_output[1] set_fc_bw = step2_output[2] diff --git a/poetry.lock b/poetry.lock index 0c2ab48f..e809e525 100644 --- a/poetry.lock +++ b/poetry.lock @@ -566,14 +566,14 @@ tox = ["tox"] [[package]] name = "dcicutils" -version = "7.7.1" +version = "7.8.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" category = "main" optional = false python-versions = ">=3.7,<3.10" files = [ - {file = "dcicutils-7.7.1-py3-none-any.whl", hash = "sha256:f22ed1a58cd53876b682e55726f9e416ba0903e3b5d9f95f4aa1a74d2e96b830"}, - {file = "dcicutils-7.7.1.tar.gz", hash = "sha256:04cc948db45d93bfe2e0e6569502c863211f0c633b7fe8d10cc28a9f72e62397"}, + {file = "dcicutils-7.8.0-py3-none-any.whl", hash = "sha256:d20b9c1edc01230ea6b979295c368d5be8b75b96032c32ffcfd1956847411de6"}, + {file = "dcicutils-7.8.0.tar.gz", hash = "sha256:069a34e5e22ae703a1e5a7c17c67f90e75e50af23c4be79fbf43ab617a27418d"}, ] [package.dependencies] From fec266f981d236a4bbc5bd56502003aef9f96d88 Mon Sep 17 00:00:00 2001 From: Clara <57916928+clarabakker@users.noreply.github.com> Date: Tue, 29 Aug 2023 17:38:19 -0400 Subject: [PATCH 18/18] version, changelog, minor spacing fix --- CHANGELOG.rst | 8 ++++++++ chalicelib_fourfront/checks/wfr_encode_checks.py | 3 +-- pyproject.toml | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 998e1fa0..30d815ff 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,14 @@ foursight Change Log ---------- +3.8.1 +===== + +`PR 528: ChIP-seq update to 2.1.6 `_ + +* Modify wfr_encode_checks to run the updated (v2.1.6) ChIP-seq pipeline +* Update helpers (utils and settings) to run the modified check + 3.8.0 ===== diff --git a/chalicelib_fourfront/checks/wfr_encode_checks.py b/chalicelib_fourfront/checks/wfr_encode_checks.py index 50de5aa2..0baf7eb3 100644 --- a/chalicelib_fourfront/checks/wfr_encode_checks.py +++ b/chalicelib_fourfront/checks/wfr_encode_checks.py @@ -381,10 +381,9 @@ def rename_chip(input_at_id_list): if ta_cnt: s2_input_files['chip.ctl_tas'] = ta_cnt s2_input_files['additional_file_parameters']['chip.ctl_tas'] = {"rename": rename_chip(ta_cnt)} - + # collect parameters parameters = {} - if not control_set: if target_type == 'histone': set_summary += "| skipped - histone without control needs attention, ie change to tf" diff --git a/pyproject.toml b/pyproject.toml index 50f5f2fc..0f6db225 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "foursight" -version = "3.8.0" +version = "3.8.1" description = "Serverless Chalice Application for Monitoring" authors = ["4DN-DCIC Team "] license = "MIT"