Skip to content

Commit

Permalink
update: psm
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Oct 18, 2024
1 parent 8c451e0 commit 55f9989
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 16 deletions.
8 changes: 3 additions & 5 deletions quantmsio/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@
"opt_global_q-value": "global_qvalue",
"opt_global_cv_MS:1002217_decoy_peptide": "is_decoy",
"calc_mass_to_charge": "calculated_mz",
"accession": "pg_accessions",
"accession": "mp_accessions",
"unique": "unique",
"charge": "precursor_charge",
"exp_mass_to_charge": "observed_mz",
"retention_time": "rt",
"retention_time": "rt"
}
PSM_USECOLS = list(PSM_MAP.keys()) + [
"spectra_ref",
"start",
"end",
"spectra_ref"
]

MSSTATS_MAP = {
Expand Down
18 changes: 7 additions & 11 deletions quantmsio/core/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,7 @@ def generate_report(self, chunksize=1000000, protein_str=None):
yield df

def transform_psm(self, df):
df.loc[:, "pg_positions"] = df[["start", "end"]].apply(
lambda row: self.generate_positions(row["start"], row["end"]), axis=1
)
df.loc[:, "scan_number"] = df["spectra_ref"].apply(generate_scan_number)
df.loc[:, "scan"] = df["spectra_ref"].apply(generate_scan_number)

df.loc[:, "reference_file_name"] = df["spectra_ref"].apply(lambda x: self._ms_runs[x[: x.index(":")]])
df.loc[:, "additional_scores"] = df[list(self._score_names.values())].apply(
Expand All @@ -72,7 +69,7 @@ def transform_psm(self, df):
),
axis=1,
)
df.drop(["start", "end", "spectra_ref", "search_engine", "search_engine_score[1]"], inplace=True, axis=1)
df.drop(["spectra_ref", "search_engine", "search_engine_score[1]"], inplace=True, axis=1)

@staticmethod
def transform_parquet(df):
Expand All @@ -86,18 +83,17 @@ def _genarate_additional_scores(self, cols):
return struct_list

def add_addition_msg(self, df):
df.loc[:, "protein_global_qvalue"] = df["pg_accessions"].map(self._protein_global_qvalue_map)
df.loc[:, "pg_global_qvalue"] = df["mp_accessions"].map(self._protein_global_qvalue_map)
df.loc[:, "best_id_score"] = None
df.loc[:, "consensus_support"] = None
df.loc[:, "modification_details"] = None
df.loc[:, "predicted_rt"] = None
df.loc[:, "gg_accessions"] = None
df.loc[:, "gg_names"] = None
df.loc[:, "ion_mobility"] = None
df.loc[:, "num_peaks"] = None
df.loc[:, "number_peaks"] = None
df.loc[:, "mz_array"] = None
df.loc[:, "intensity_array"] = None
df.loc[:, "rank"] = None
df.loc[:, "cv_params"] = None
df.loc[:, "quantmsio_version"] = QUANTMSIO_VERSION

def write_feature_to_file(self, output_path, chunksize=1000000, protein_file=None):
protein_list = extract_protein_list(protein_file) if protein_file else None
Expand All @@ -117,7 +113,7 @@ def convert_to_parquet_format(res, modifications):
res["unique"] = res["unique"].astype("Int32")
res["modifications"] = res["modifications"].apply(lambda x: generate_modification_list(x, modifications))
res["precursor_charge"] = res["precursor_charge"].map(lambda x: None if pd.isna(x) else int(x)).astype("Int32")
#res["calculated_mz"] = res["calculated_mz"].astype(float)
res["calculated_mz"] = res["calculated_mz"].astype(float)
res["observed_mz"] = res["observed_mz"].astype(float)
res["posterior_error_probability"] = res["posterior_error_probability"].astype(float)
res["global_qvalue"] = res["global_qvalue"].astype(float)
Expand Down

0 comments on commit 55f9989

Please sign in to comment.