Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ChIP-seq update to 2.1.6 #528

Merged
merged 24 commits into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0324419
set input files for aln wf as [[File]] rather than [[[File]]] to matc…
clarabakker Jun 16, 2022
f74499d
set checks to use new workflows, testing
clarabakker Jun 22, 2022
1d8e127
restore workflow specs (changed wf instead to match existing wf names…
clarabakker Jun 22, 2022
e3c640d
added wf v1.1.2-specific parameters, specified new wf uuid in wfrset_…
clarabakker Jun 22, 2022
3697854
bracket mismatch typo
clarabakker Jun 22, 2022
8b14c2f
chip parameter step1/1c typo
clarabakker Jun 22, 2022
a1bbbc9
further simplify fastqs input array, set reference files with names w…
clarabakker Jun 24, 2022
f837a9b
ChIP check I/O and parameters modifications for aln (ctl) and post-al…
clarabakker Jun 30, 2022
7aeed9a
add back original to accepted versions
clarabakker Jun 30, 2022
34838bc
change chip-seq to use wdl's control parameters (rm ctl prefix from f…
clarabakker Jul 7, 2022
d90b16f
override benchmarking for chip ctl wf
clarabakker Jul 8, 2022
c4f5514
added bool to get_chip_files to allow different endedness; updated us…
clarabakker Jul 21, 2022
7e46365
change paired array to booleans for ChIP-seq post-align
clarabakker Jul 29, 2022
4e15ca9
update ChIP-seq pipeline versions
clarabakker Aug 8, 2022
4e37bce
Merge branch 'master' into chip-update
clarabakker Aug 18, 2022
04aabe7
adjustments for ChIP-seq benchmarking, new WDL parameters for paired …
clarabakker Dec 2, 2022
feb8d65
Merge branch 'master' into chip-update
clarabakker Dec 5, 2022
081242c
qc get failure handling
clarabakker Mar 27, 2023
6a975b0
Merge branch 'master' into chip-update
clarabakker Mar 27, 2023
7755f70
Merge branch 'master' into chip-update
clarabakker Mar 27, 2023
5d4c9ba
Merge branch 'master' into chip-update
Aug 17, 2023
ae2a949
identify mixed paired/single ended, general cleanup
Aug 29, 2023
d751f15
Merge branch 'master' into chip-update
clarabakker Aug 29, 2023
fec266f
version, changelog, minor spacing fix
clarabakker Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ foursight
Change Log
----------

3.8.1
=====

`PR 528: ChIP-seq update to 2.1.6 <https://github.com/4dn-dcic/foursight/pull/528>`_

* Modify wfr_encode_checks to run the updated (v2.1.6) ChIP-seq pipeline
* Update helpers (utils and settings) to run the modified check

3.8.0
=====

Expand Down
43 changes: 25 additions & 18 deletions chalicelib_fourfront/checks/helpers/wfr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@
},
"encode-chipseq-aln-chip": {
"run_time": 200,
"accepted_versions": ["1.1.1"]
"accepted_versions": ["1.1.1", "2.1.6"]
},
"encode-chipseq-aln-ctl": {
"run_time": 200,
"accepted_versions": ["1.1.1"]
"accepted_versions": ["1.1.1", "2.1.6"]
},
"encode-chipseq-postaln": {
"run_time": 200,
"accepted_versions": ["1.1.1"]
"accepted_versions": ["1.1.1", "2.1.6"]
},
"encode-atacseq-aln": {
"run_time": 200,
Expand Down Expand Up @@ -199,7 +199,7 @@
# OFFICIAL
'ATAC-seq': ['ENCODE_ATAC_Pipeline_1.1.1'],
# OFFICIAL
'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1'],
'ChIP-seq': ['ENCODE_ChIP_Pipeline_1.1.1', 'ENCODE_ChIP_Pipeline_2.1.6'],
# OFFICIAL
'RNA-seq': ['ENCODE_RNAseq_Pipeline_1.1'],
'single cell Repli-seq': [''],
Expand Down Expand Up @@ -2104,17 +2104,17 @@ def get_chip_info(f_exp_resp, all_items):
return control, control_set, target_type, organism


def get_chip_files(exp_resp, all_files):
def get_chip_files(exp_resp, all_files, isChip):
files = []
paired = ""
paired = []
exp_files = exp_resp['files']
for a_file in exp_files:
f_t = []
file_resp = [i for i in all_files if i['uuid'] == a_file['uuid']][0]
# get pair end no
pair_end = file_resp.get('paired_end')
if pair_end == '2':
paired = 'paired'
paired.append('paired')
continue
# get paired file
paired_with = ""
Expand All @@ -2124,22 +2124,22 @@ def get_chip_files(exp_resp, all_files):
else:
for relation in relations:
if relation['relationship_type'] == 'paired with':
paired = 'paired'
paired.append('paired')
paired_with = relation['file']['@id']
# decide if data is not paired end reads
if not paired_with:
if not paired:
paired = 'single'
else:
if paired != 'single':
print('inconsistent fastq pair info')
continue
paired.append('single')
f_t.append(file_resp['@id'])
else:
f2 = [i for i in all_files if i['@id'] == paired_with][0]
f_t.append(file_resp['@id'])
f_t.append(f2['@id'])
files.append(f_t)

# needs to output a string for non-ChIP-seq usage
if not isChip:
paired = paired[0]
return files, paired


Expand All @@ -2153,15 +2153,22 @@ def select_best_2(file_list, all_files, all_qcs):
f_resp = [i for i in all_files if i['@id'] == f][0]
qc = f_resp['quality_metric']
qc_resp = [i for i in all_qcs if i['uuid'] == qc['uuid']][0]
try:
score = qc_resp['nodup_flagstat_qc'][0]['mapped']
except Exception:
score = qc_resp['ctl_nodup_flagstat_qc'][0]['mapped']
if 'nodup_flagstat_qc' in qc_resp:
try:
score = qc_resp['nodup_flagstat_qc'][0]['mapped']
except Exception:
score = qc_resp['ctl_nodup_flagstat_qc'][0]['mapped']
if 'align' in qc_resp:
try:
score = qc_resp['align']['nodup_samstat']['rep1']['mapped_reads']
except Exception:
score = qc_resp['align']['ctl_nodup_samstat']['rep1']['mapped_reads']
else:
raise Exception('no mapped qc statistics found')
scores.append((score, f))
scores = sorted(scores, key=lambda x: -x[0])
return [scores[0][1], scores[1][1]]


def limit_number_of_runs(check, my_auth):
"""Checks the number of workflow runs started in the past 6h. Return the
number of remaining runs before hitting the rate limit of pulls from Docker
Expand Down
15 changes: 8 additions & 7 deletions chalicelib_fourfront/checks/helpers/wfrset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
},
{
"app_name": "encode-chipseq-aln-chip",
"workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-chip",
"workflow_uuid": "212a9c91-25d6-473f-b56b-8dd93958c580",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

despite the property name, might it be better to use the alias as was formerly done in case the workflow is added to a new environment (with a different uuid being generated) - perhaps unlikely but if at all possible?

Copy link
Member Author

@clarabakker clarabakker Aug 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was aiming to make it consistent with other workflows, since I made the new UUIDs consistent (for the current envs). The aliases also are the same for both new and old versions, which maybe also was a factor? It needs to know the new and old version are the same workflow, but only run the new one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, I see your confusion--the aliases are definitely not the same and the app name keeps the versions together. I do think it is more consistent with other workflows to use the uuid. It is also hard to query aliases for someone going from this workflow setting to the workflow on the portal, right?

"parameters": {},
"config": {},
"config": {"ebs_size": 70},
'custom_pf_fields': {
'chip.first_ta': {
'genome_assembly': genome,
Expand All @@ -213,9 +213,9 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
},
{
"app_name": "encode-chipseq-aln-ctl",
"workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl",
"workflow_uuid": "4eb427f1-a7d5-4d74-8cfa-4c77f42d5b43",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as previous comment

"parameters": {},
"config": {},
"config":{"instance_type": 'c5.2xlarge', "ebs_size": 70},
'custom_pf_fields': {
'chip.first_ta_ctl': {
'genome_assembly': genome,
Expand All @@ -226,9 +226,9 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
},
{
"app_name": "encode-chipseq-postaln",
"workflow_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln",
"workflow_uuid": "291d4c64-75de-434a-9d98-01f40d19e15e",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above

"parameters": {},
"config": {},
"config": {"instance_type": "c5.2xlarge", "ebs_size": 80},
'custom_pf_fields': {
'chip.optimal_peak': {
'genome_assembly': genome,
Expand All @@ -238,7 +238,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
'genome_assembly': genome,
'file_type': 'conservative peaks',
'description': 'Conservative peak calls from ENCODE ChIP-Seq Pipeline'},
'chip.sig_fc': {
'chip.fc_bw': {
'genome_assembly': genome,
'file_type': 'signal fold change',
'description': 'ChIP-seq signal fold change over input control'}
Expand Down Expand Up @@ -329,6 +329,7 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
'rna.strandedness_direction': '',
'rna.endedness': ''
},
"config": {"instance_type": ["m5a.4xlarge", "m6a.4xlarge"], "ebs_size": 90},
'custom_pf_fields': {
'rna.outbam': {
'genome_assembly': genome,
Expand Down
Loading
Loading