Skip to content

Commit

Permalink
Merge pull request #8 from daichengxin/dev
Browse files Browse the repository at this point in the history
fixed empty spectra bugs
  • Loading branch information
ypriverol authored Jan 13, 2025
2 parents 87ad69f + 0b0258c commit bd07bc7
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 45 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "quantms-rescoring"
description = "quantms-rescoring: Python scripts and helpers for the quantMS workflow"
readme = "README.md"
license = "MIT"
version = "0.0.3"
version = "0.0.4"
authors = [
"Yasset Perez-Riverol <[email protected]>",
"Dai Chengxin <[email protected]>",
Expand Down
2 changes: 1 addition & 1 deletion quantmsrescore/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.4"
108 changes: 65 additions & 43 deletions quantmsrescore/ms2rescore.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import importlib.resources
import json
import logging

import os
import click
import pyopenms as oms
from ms2rescore import package_data, rescore
Expand Down Expand Up @@ -141,24 +141,24 @@ def _parse_psm(


def parse_cli_arguments_to_config(
config_file: str = None,
feature_generators: str = None,
ms2pip_model_dir: str = None,
ms2pip_model: str = None,
ms2_tolerance: float = None,
calibration_set_size: float = None,
rescoring_engine: str = None,
rng: int = None,
test_fdr: float = None,
processes: int = None,
spectrum_path: str = None,
fasta_file: str = None,
id_decoy_pattern: str = None,
lower_score_is_better: bool = None,
output_path: str = None,
log_level: str = None,
spectrum_id_pattern: str = None,
psm_id_pattern: str = None
config_file: str = None,
feature_generators: str = None,
ms2pip_model_dir: str = None,
ms2pip_model: str = None,
ms2_tolerance: float = None,
calibration_set_size: float = None,
rescoring_engine: str = None,
rng: int = None,
test_fdr: float = None,
processes: int = None,
spectrum_path: str = None,
fasta_file: str = None,
id_decoy_pattern: str = None,
lower_score_is_better: bool = None,
output_path: str = None,
log_level: str = None,
spectrum_id_pattern: str = None,
psm_id_pattern: str = None
) -> dict:
if config_file is None:
config = json.load(
Expand Down Expand Up @@ -254,6 +254,31 @@ def rescore_idxml(input_file, output_file, config) -> None:
peptide_ids = reader.new_peptide_ids
else:
peptide_ids = reader.peptide_ids

# check if any spectrum is empty
exp = oms.MSExperiment()
oms.MzMLFile().load(config["ms2rescore"]["spectrum_path"], exp)
empty_spectra = 0
spec = []
for spectrum in exp:
peaks_tuple = spectrum.get_peaks()
if len(peaks_tuple[0]) == 0 and spectrum.getMSLevel() == 2:
logging.warning(
f"{spectrum.getNativeID()} spectra don't have spectra information!"
)
empty_spectra += 1
continue
spec.append(spectrum)

if empty_spectra != 0:
logging.warning(
f"Removed {empty_spectra} spectra without spectra information!"
)
exp.setSpectra(spec)
mzml_output = os.path.splitext(os.path.basename(config["ms2rescore"]["spectrum_path"]))[0] + "_clear.mzML"
oms.MzMLFile().store(mzml_output, exp)
config["ms2rescore"]["spectrum_path"] = mzml_output

# Rescore
rescore(config, psm_list)

Expand All @@ -266,7 +291,7 @@ def rescore_idxml(input_file, output_file, config) -> None:


def filter_out_artifact_psms(
psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification]
psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification]
) -> List[oms.PeptideIdentification]:
"""Filter out PeptideHits that could not be processed by all feature generators"""
num_mandatory_features = max([len(psm.rescoring_features) for psm in psm_list])
Expand Down Expand Up @@ -424,26 +449,26 @@ def filter_out_artifact_psms(
)
@click.pass_context
def ms2rescore(
ctx,
psm_file: str,
spectrum_path,
output_path: str,
log_level,
processes,
fasta_file,
test_fdr,
feature_generators,
ms2pip_model_dir,
ms2pip_model,
ms2_tolerance,
calibration_set_size,
rescoring_engine,
rng,
id_decoy_pattern,
lower_score_is_better,
config_file: str,
spectrum_id_pattern: str,
psm_id_pattern: str
ctx,
psm_file: str,
spectrum_path,
output_path: str,
log_level,
processes,
fasta_file,
test_fdr,
feature_generators,
ms2pip_model_dir,
ms2pip_model,
ms2_tolerance,
calibration_set_size,
rescoring_engine,
rng,
id_decoy_pattern,
lower_score_is_better,
config_file: str,
spectrum_id_pattern: str,
psm_id_pattern: str
):
"""
Rescore PSMs in an idXML file and keep other information unchanged.
Expand Down Expand Up @@ -505,6 +530,3 @@ def ms2rescore(
logging.info("MS²Rescore config:")
logging.info(config)
rescore_idxml(psm_file, output_path, config)



0 comments on commit bd07bc7

Please sign in to comment.