From 55f9989ad6e13f9e2a9a6dba997bb01bae440bef Mon Sep 17 00:00:00 2001 From: zprobot <1727697083@qq.com> Date: Fri, 18 Oct 2024 14:56:44 +0800 Subject: [PATCH] update: psm --- quantmsio/core/common.py | 8 +++----- quantmsio/core/psm.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/quantmsio/core/common.py b/quantmsio/core/common.py index 7c96900..f3510d0 100644 --- a/quantmsio/core/common.py +++ b/quantmsio/core/common.py @@ -7,16 +7,14 @@ "opt_global_q-value": "global_qvalue", "opt_global_cv_MS:1002217_decoy_peptide": "is_decoy", "calc_mass_to_charge": "calculated_mz", - "accession": "pg_accessions", + "accession": "mp_accessions", "unique": "unique", "charge": "precursor_charge", "exp_mass_to_charge": "observed_mz", - "retention_time": "rt", + "retention_time": "rt" } PSM_USECOLS = list(PSM_MAP.keys()) + [ - "spectra_ref", - "start", - "end", + "spectra_ref" ] MSSTATS_MAP = { diff --git a/quantmsio/core/psm.py b/quantmsio/core/psm.py index 20eb201..558e53d 100644 --- a/quantmsio/core/psm.py +++ b/quantmsio/core/psm.py @@ -57,10 +57,7 @@ def generate_report(self, chunksize=1000000, protein_str=None): yield df def transform_psm(self, df): - df.loc[:, "pg_positions"] = df[["start", "end"]].apply( - lambda row: self.generate_positions(row["start"], row["end"]), axis=1 - ) - df.loc[:, "scan_number"] = df["spectra_ref"].apply(generate_scan_number) + df.loc[:, "scan"] = df["spectra_ref"].apply(generate_scan_number) df.loc[:, "reference_file_name"] = df["spectra_ref"].apply(lambda x: self._ms_runs[x[: x.index(":")]]) df.loc[:, "additional_scores"] = df[list(self._score_names.values())].apply( @@ -72,7 +69,7 @@ def transform_psm(self, df): ), axis=1, ) - df.drop(["start", "end", "spectra_ref", "search_engine", "search_engine_score[1]"], inplace=True, axis=1) + df.drop(["spectra_ref", "search_engine", "search_engine_score[1]"], inplace=True, axis=1) @staticmethod def transform_parquet(df): @@ -86,18 +83,17 @@ def _genarate_additional_scores(self, cols): return struct_list def add_addition_msg(self, df): - df.loc[:, "protein_global_qvalue"] = df["pg_accessions"].map(self._protein_global_qvalue_map) + df.loc[:, "pg_global_qvalue"] = df["mp_accessions"].map(self._protein_global_qvalue_map) + df.loc[:, "best_id_score"] = None + df.loc[:, "consensus_support"] = None df.loc[:, "modification_details"] = None df.loc[:, "predicted_rt"] = None - df.loc[:, "gg_accessions"] = None - df.loc[:, "gg_names"] = None df.loc[:, "ion_mobility"] = None - df.loc[:, "num_peaks"] = None + df.loc[:, "number_peaks"] = None df.loc[:, "mz_array"] = None df.loc[:, "intensity_array"] = None df.loc[:, "rank"] = None df.loc[:, "cv_params"] = None - df.loc[:, "quantmsio_version"] = QUANTMSIO_VERSION def write_feature_to_file(self, output_path, chunksize=1000000, protein_file=None): protein_list = extract_protein_list(protein_file) if protein_file else None @@ -117,7 +113,7 @@ def convert_to_parquet_format(res, modifications): res["unique"] = res["unique"].astype("Int32") res["modifications"] = res["modifications"].apply(lambda x: generate_modification_list(x, modifications)) res["precursor_charge"] = res["precursor_charge"].map(lambda x: None if pd.isna(x) else int(x)).astype("Int32") - #res["calculated_mz"] = res["calculated_mz"].astype(float) + res["calculated_mz"] = res["calculated_mz"].astype(float) res["observed_mz"] = res["observed_mz"].astype(float) res["posterior_error_probability"] = res["posterior_error_probability"].astype(float) res["global_qvalue"] = res["global_qvalue"].astype(float)