From d7f4c403fbed6f16871c38ce9086210a1b1e5699 Mon Sep 17 00:00:00 2001 From: Marcel Levstek <62072754+marcellevstek@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:11:49 +0200 Subject: [PATCH] Remove ``RNA-SeQC`` metrics from ``RNATables`` QC fields --- docs/CHANGELOG.rst | 5 ++++- src/resdk/tables/rna.py | 39 +-------------------------------------- 2 files changed, 5 insertions(+), 39 deletions(-) diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst index 23cb8f36..199ae462 100644 --- a/docs/CHANGELOG.rst +++ b/docs/CHANGELOG.rst @@ -9,11 +9,14 @@ All notable changes to this project are documented in this file. Unreleased ========== +Changed +------- +- Remove ``RNA-SeQC`` metrics from ``RNATables`` QC fields + Added ----- - Add ``restart`` method to the ``Data`` resource - Fixed ----- - Fix fetching ``RNATables`` for collections with missing MultiQC objects by diff --git a/src/resdk/tables/rna.py b/src/resdk/tables/rna.py index c9cd9f70..e069ba1c 100644 --- a/src/resdk/tables/rna.py +++ b/src/resdk/tables/rna.py @@ -163,18 +163,6 @@ "type": "Int64", "agg_func": "mean", }, - { - "name": "RNA-SeQC_mqc-generalstats-rna_seqc-Expression_Profiling_Efficiency", - "slug": "profiling_efficiency", - "type": "float64", - "agg_func": "mean", - }, - { - "name": "RNA-SeQC_mqc-generalstats-rna_seqc-Genes_Detected", - "slug": "genes_detected", - "type": "Int64", - "agg_func": "mean", - }, { "slug": "strandedness_code", "type": "string", @@ -185,21 +173,6 @@ }, ] -MQC_COVERAGE_COLUMNS = [ - { - "name": "Genes used in 3' bias", - "slug": "num_genes_three_prime_bias", - "type": "Int64", - "agg_func": "mean", - }, - { - "name": "Mean 3' bias", - "slug": "mean_three_prime_bias", - "type": "float64", - "agg_func": "mean", - }, -] - def general_multiqc_parser(file_object, name, column_names): """General parser for MultiQC files.""" @@ -239,11 +212,6 @@ def multiqc_general_stats_parser(file_object, name): return general_multiqc_parser(file_object, name, MQC_GENERAL_COLUMNS) -def multiqc_coverage_parser(file_object, name): - """Parse "multiqc_rna-seqc_coverage_stats.txt" file.""" - return general_multiqc_parser(file_object, name, MQC_COVERAGE_COLUMNS) - - def multiqc_strand_parser(file_object, name): """Parse "multiqc_library_strandedness.txt" file.""" df = pd.read_csv(file_object, sep="\t", index_col=0) @@ -506,7 +474,6 @@ def _download_qc(self) -> pd.DataFrame: "uri_general": f"{mqc.id}/multiqc_data/multiqc_general_stats.txt", "uri_strand": f"{mqc.id}/multiqc_data/multiqc_library_strandedness.txt", "uri_build": f"{mqc.id}/multiqc_data/multiqc_sample_info.txt", - "uri_coverage": f"{mqc.id}/multiqc_data/multiqc_rna-seqc_coverage_stats.txt", } df = pd.DataFrame(index=[sample.id for sample in self._samples]) @@ -515,7 +482,6 @@ def _download_qc(self) -> pd.DataFrame: "uri_general": multiqc_general_stats_parser, "uri_strand": multiqc_strand_parser, "uri_build": multiqc_build_parser, - "uri_coverage": multiqc_coverage_parser, } for type_, parser in parsers.items(): uris = [item[type_] for item in mqc_db.values()] @@ -528,10 +494,7 @@ def _download_qc(self) -> pd.DataFrame: STRANDEDNESS_COLUMN = [{"slug": "strandedness_code", "type": "category"}] column_types = { c["slug"]: c["type"] - for c in MQC_GENERAL_COLUMNS - + MQC_COVERAGE_COLUMNS - + BUILD_COLUMN - + STRANDEDNESS_COLUMN + for c in MQC_GENERAL_COLUMNS + BUILD_COLUMN + STRANDEDNESS_COLUMN if c["slug"] in df.columns } df = df[column_types.keys()].astype(column_types)