From f759d4bd690185f3fbab5287088bf02bd0fb1635 Mon Sep 17 00:00:00 2001 From: aschroed Date: Wed, 6 Sep 2023 14:43:54 -0400 Subject: [PATCH 1/3] added option to run QC checks on lab provided files by ignoring either the source_experiments (pairsQC) or WFR (bamQC) --- chalicelib_fourfront/checks/wfr_checks.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/chalicelib_fourfront/checks/wfr_checks.py b/chalicelib_fourfront/checks/wfr_checks.py index 9c3d7c85..99ba2cd8 100644 --- a/chalicelib_fourfront/checks/wfr_checks.py +++ b/chalicelib_fourfront/checks/wfr_checks.py @@ -390,6 +390,9 @@ def pairsqc_status(connection, **kwargs): Keyword arguments: lab_title -- limit search with a lab i.e. Bing+Ren, UCSD start_date -- limit search to files generated since a date formatted YYYY-MM-DD + non-dcic -- if True does not require source_experiments so would run on any pairs file + WARNING: you probably want to run along with lab_title or start_date to prevent + running on all non-DCIC pairs files run_time -- assume runs beyond run_time are dead (default=24 hours) """ start = datetime.utcnow() @@ -406,7 +409,7 @@ def pairsqc_status(connection, **kwargs): # Build the query (skip to be uploaded by workflow) query = ("/search/?file_format.file_format=pairs&type=FileProcessed" "&status=pre-release&status=released&status=released+to+project&status=uploaded" - "&quality_metric.uuid=No+value&limit=all&source_experiments!=No value") + "&quality_metric.uuid=No+value&limit=all") # add date s_date = kwargs.get('start_date') if s_date: @@ -415,6 +418,8 @@ def pairsqc_status(connection, **kwargs): lab = kwargs.get('lab_title') if lab: query += '&lab.display_title=' + lab + if kwargs.get('non-dcic') is not True: + query += '&source_experiments!=No+value' # The search res = ff_utils.search_metadata(query, key=my_auth) if not res: @@ -1987,6 +1992,9 @@ def bamqc_status(connection, **kwargs): Keyword arguments: lab_title -- limit search with a lab i.e. Bing+Ren, UCSD start_date -- limit search to files generated since a date formatted YYYY-MM-DD + non-dcic -- if true does not check for wfr outputs so will run on any bams + WARNING: should likely be used with other options like lab_title or start_date + to avoid running on all non-QC'ed bams run_time -- assume runs beyond run_time are dead (default=24 hours) """ start = datetime.utcnow() @@ -2000,13 +2008,15 @@ def bamqc_status(connection, **kwargs): check, skip = wfr_utils.check_indexing(check, connection) if skip: return check - # Build the query (find bam files produced bt the Hi-C Post Alignment Processing wfr) + # Build the query default_stati = 'released&status=uploaded&status=released+to+project&status=restricted' + # find bam files produced bt the Hi-C Post Alignment Processing wfr wfr_outputs = "&workflow_run_outputs.workflow.title=Hi-C+Post-alignment+Processing+0.2.6" stati = 'status=' + (kwargs.get('status') or default_stati) query = 'search/?file_type=alignments&{}'.format(stati) query += '&type=FileProcessed' - query += wfr_outputs + if kwargs.get('non-dcic') is not True: # skip this bit if running on non-DCIC bams + query += wfr_outputs query += '&quality_metric.display_title=No+value' # add date s_date = kwargs.get('start_date') From 3ffbe243d19817d239949fee778e6a71102a3be6 Mon Sep 17 00:00:00 2001 From: aschroed Date: Wed, 6 Sep 2023 16:04:22 -0400 Subject: [PATCH 2/3] version bump --- CHANGELOG.rst | 5 +++++ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 30d815ff..a32dd644 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,11 @@ foursight Change Log ---------- +3.8.2 +===== + +* add a non-dcic boolean option to BamQC and PairsQC to allow these workflows to run on lab provided files + 3.8.1 ===== diff --git a/pyproject.toml b/pyproject.toml index 0f6db225..98d6565c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "foursight" -version = "3.8.1" +version = "3.8.2" description = "Serverless Chalice Application for Monitoring" authors = ["4DN-DCIC Team "] license = "MIT" From 8c7c422bdfdacda613dd608a1e2cca6f4f69a133 Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 7 Sep 2023 12:18:29 -0400 Subject: [PATCH 3/3] Added new non-dcic param to the check decorators so it shows up in the UI --- chalicelib_fourfront/checks/wfr_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chalicelib_fourfront/checks/wfr_checks.py b/chalicelib_fourfront/checks/wfr_checks.py index 99ba2cd8..955f5fa5 100644 --- a/chalicelib_fourfront/checks/wfr_checks.py +++ b/chalicelib_fourfront/checks/wfr_checks.py @@ -384,7 +384,7 @@ def fastqc_start(connection, **kwargs): return action -@check_function(lab_title=None, start_date=None, action="pairsqc_start") +@check_function(lab_title=None, start_date=None, non-dcic=False, action="pairsqc_start") def pairsqc_status(connection, **kwargs): """Searches for pairs files produced by 4dn pipelines that don't have pairsqc Keyword arguments: @@ -1986,7 +1986,7 @@ def rna_seq_start(connection, **kwargs): return action -@check_function(lab_title=None, start_date=None, action="bamqc_start") +@check_function(lab_title=None, start_date=None, non-dcic=False, action="bamqc_start") def bamqc_status(connection, **kwargs): """Searches for annotated bam files that do not have a qc object Keyword arguments: