Skip to content

Commit

Permalink
Merge pull request #546 from 4dn-dcic/ajs_allow_lab_pf_qc_runs
Browse files Browse the repository at this point in the history
Allow lab supplied processed files to be QCed
  • Loading branch information
aschroed authored Sep 7, 2023
2 parents 6011a5b + 8c7c422 commit 3f82b08
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ foursight
Change Log
----------

3.8.2
=====

* add a non-dcic boolean option to BamQC and PairsQC to allow these workflows to run on lab provided files

3.8.1
=====

Expand Down
20 changes: 15 additions & 5 deletions chalicelib_fourfront/checks/wfr_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,15 @@ def fastqc_start(connection, **kwargs):
return action


@check_function(lab_title=None, start_date=None, action="pairsqc_start")
@check_function(lab_title=None, start_date=None, non-dcic=False, action="pairsqc_start")
def pairsqc_status(connection, **kwargs):
"""Searches for pairs files produced by 4dn pipelines that don't have pairsqc
Keyword arguments:
lab_title -- limit search with a lab i.e. Bing+Ren, UCSD
start_date -- limit search to files generated since a date formatted YYYY-MM-DD
non-dcic -- if True does not require source_experiments so would run on any pairs file
WARNING: you probably want to run along with lab_title or start_date to prevent
running on all non-DCIC pairs files
run_time -- assume runs beyond run_time are dead (default=24 hours)
"""
start = datetime.utcnow()
Expand All @@ -406,7 +409,7 @@ def pairsqc_status(connection, **kwargs):
# Build the query (skip to be uploaded by workflow)
query = ("/search/?file_format.file_format=pairs&type=FileProcessed"
"&status=pre-release&status=released&status=released+to+project&status=uploaded"
"&quality_metric.uuid=No+value&limit=all&source_experiments!=No value")
"&quality_metric.uuid=No+value&limit=all")
# add date
s_date = kwargs.get('start_date')
if s_date:
Expand All @@ -415,6 +418,8 @@ def pairsqc_status(connection, **kwargs):
lab = kwargs.get('lab_title')
if lab:
query += '&lab.display_title=' + lab
if kwargs.get('non-dcic') is not True:
query += '&source_experiments!=No+value'
# The search
res = ff_utils.search_metadata(query, key=my_auth)
if not res:
Expand Down Expand Up @@ -1981,12 +1986,15 @@ def rna_seq_start(connection, **kwargs):
return action


@check_function(lab_title=None, start_date=None, action="bamqc_start")
@check_function(lab_title=None, start_date=None, non-dcic=False, action="bamqc_start")
def bamqc_status(connection, **kwargs):
"""Searches for annotated bam files that do not have a qc object
Keyword arguments:
lab_title -- limit search with a lab i.e. Bing+Ren, UCSD
start_date -- limit search to files generated since a date formatted YYYY-MM-DD
non-dcic -- if true does not check for wfr outputs so will run on any bams
WARNING: should likely be used with other options like lab_title or start_date
to avoid running on all non-QC'ed bams
run_time -- assume runs beyond run_time are dead (default=24 hours)
"""
start = datetime.utcnow()
Expand All @@ -2000,13 +2008,15 @@ def bamqc_status(connection, **kwargs):
check, skip = wfr_utils.check_indexing(check, connection)
if skip:
return check
# Build the query (find bam files produced bt the Hi-C Post Alignment Processing wfr)
# Build the query
default_stati = 'released&status=uploaded&status=released+to+project&status=restricted'
# find bam files produced bt the Hi-C Post Alignment Processing wfr
wfr_outputs = "&workflow_run_outputs.workflow.title=Hi-C+Post-alignment+Processing+0.2.6"
stati = 'status=' + (kwargs.get('status') or default_stati)
query = 'search/?file_type=alignments&{}'.format(stati)
query += '&type=FileProcessed'
query += wfr_outputs
if kwargs.get('non-dcic') is not True: # skip this bit if running on non-DCIC bams
query += wfr_outputs
query += '&quality_metric.display_title=No+value'
# add date
s_date = kwargs.get('start_date')
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "foursight"
version = "3.8.1"
version = "3.8.2"
description = "Serverless Chalice Application for Monitoring"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 3f82b08

Please sign in to comment.