cv_params added to specification

bigbio · Nov 10, 2024 · 293cd8b · 293cd8b
1 parent 8cf0208
commit 293cd8b
Show file tree

Hide file tree

Showing 7 changed files with 26 additions and 17 deletions.
diff --git a/quantmsio/commands/generate_gene_message_command.py b/quantmsio/commands/generate_gene_message_command.py
@@ -44,4 +44,4 @@ def map_gene_message_to_parquet(
     """
     if partitions:
         partitions = partitions.split(",")
-    generate_feature_of_gene(parquet_path, fasta, output_folder, file_num, partitions, species)
+    generate_feature_of_gene(parquet_path, fasta, output_folder, file_num, partitions, species)
diff --git a/quantmsio/commands/maxquant_command.py b/quantmsio/commands/maxquant_command.py
@@ -105,4 +105,6 @@ def convert_maxquant_feature(
 
     MQ = MaxQuant()
     output_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".feature.parquet")
-    MQ.convert_feature_to_parquet(evidence_path=evidence_file, sdrf_path=sdrf_file, output_path=output_path, chunksize=chunksize)
+    MQ.convert_feature_to_parquet(
+        evidence_path=evidence_file, sdrf_path=sdrf_file, output_path=output_path, chunksize=chunksize
+    )
diff --git a/quantmsio/core/common.py b/quantmsio/core/common.py
@@ -76,7 +76,7 @@
     "Modified sequence": "peptidoform",
     "Raw file": "reference_file_name",
     "Score": "additional_scores",
-    "PIF": "parent_ion_score"
+    "PIF": "parent_ion_score",
 }
 
 MAXQUANT_FEATURE_MAP = {

diff --git a/quantmsio/core/maxquant.py b/quantmsio/core/maxquant.py
@@ -247,7 +247,9 @@ def main_operate(self, df: pd.DataFrame):
         df["additional_scores"] = df["additional_scores"].apply(
             lambda x: [{"score_name": "maxquant_score", "score_value": np.float32(x)}]
         )
-        df.loc[:, "cv_params"] = df["parent_ion_score"].apply(lambda socre: [{"cv_name": "parent_ion_score", "cv_value": str(socre)}])
+        df.loc[:, "cv_params"] = df["parent_ion_score"].apply(
+            lambda socre: [{"cv_name": "parent_ion_score", "cv_value": str(socre)}]
+        )
         df.loc[:, "predicted_rt"] = None
         df.loc[:, "ion_mobility"] = None
         return df

diff --git a/quantmsio/operate/plots.py b/quantmsio/operate/plots.py
@@ -7,6 +7,7 @@
 import seaborn as sns
 from quantmsio.operate.tools import transform_ibaq
 
+
 def plot_distribution_of_ibaq(ibaq_path: str, save_path: str = None, selected_column: str = None) -> None:
     """
     This function plots the distribution of the protein IBAQ values.

diff --git a/quantmsio/operate/query.py b/quantmsio/operate/query.py
@@ -186,7 +186,7 @@ def inject_gene_msg(
     def get_protein_to_gene_map(self, fasta: str, map_parameter: str = "map_protein_accession"):
         map_gene_names = generate_gene_name_map(fasta, map_parameter)
         return map_gene_names
-    
+
     def get_protein_dict(self, fasta_path):
         """
         return: protein_map {protein_accession:seq}
@@ -320,4 +320,3 @@ def get_gene_accessions(self, gene_list: list, species: str = "human"):
             if "accession" in obj and "genomic" in obj["accession"]:
                 gene_accessions_maps[obj["query"]] = ",".join(obj["accession"]["genomic"])
         return gene_accessions_maps
-
diff --git a/quantmsio/operate/tools.py b/quantmsio/operate/tools.py
@@ -18,16 +18,18 @@ def init_save_info(parquet_path: str):
     pqwriters = {}
     pqwriter_no_part = None
     filename = os.path.basename(parquet_path)
-    return pqwriters, pqwriter_no_part,filename
+    return pqwriters, pqwriter_no_part, filename
 
-def close_file(partitions:list ,pqwriters: dict, pqwriter_no_part: str):
+
+def close_file(partitions: list, pqwriters: dict, pqwriter_no_part: str):
     if not partitions or len(partitions) == 0:
         if pqwriter_no_part:
             pqwriter_no_part.close()
     else:
         for pqwriter in pqwriters.values():
             pqwriter.close()
 
+
 def generate_psms_of_spectrum(
     parquet_path: str,
     mzml_directory: str,
@@ -54,11 +56,15 @@ def generate_psms_of_spectrum(
             axis=1,
             result_type="expand",
         )
-        pqwriters, pqwriter_no_part = save_parquet_file(partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA)
+        pqwriters, pqwriter_no_part = save_parquet_file(
+            partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA
+        )
     close_file(partitions, pqwriters, pqwriter_no_part)
 
 
-def save_parquet_file(partitions, table, output_folder, filename, pqwriters = {}, pqwriter_no_part=None, schema=FEATURE_SCHEMA):
+def save_parquet_file(
+    partitions, table, output_folder, filename, pqwriters={}, pqwriter_no_part=None, schema=FEATURE_SCHEMA
+):
 
     if partitions and len(partitions) > 0:
         for key, df in table.groupby(partitions):
@@ -83,22 +89,21 @@ def save_parquet_file(partitions, table, output_folder, filename, pqwriters = {}
         pqwriter_no_part.write_table(parquet_table)
         return pqwriters, pqwriter_no_part
 
+
 def generate_feature_of_gene(
-    parquet_path: str,
-    fasta: str,
-    output_folder: str,
-    file_num: int,
-    partitions: list = None,
-    species: str = "human"    
+    parquet_path: str, fasta: str, output_folder: str, file_num: int, partitions: list = None, species: str = "human"
 ):
     pqwriters, pqwriter_no_part, filename = init_save_info(parquet_path)
     p = Query(parquet_path)
     map_gene_names = p.get_protein_to_gene_map(fasta)
     for _, table in p.iter_file(file_num=file_num):
         table = p.inject_gene_msg(table, map_gene_names, species)
-        pqwriters, pqwriter_no_part = save_parquet_file(partitions, table, output_folder, filename, pqwriters, pqwriter_no_part)
+        pqwriters, pqwriter_no_part = save_parquet_file(
+            partitions, table, output_folder, filename, pqwriters, pqwriter_no_part
+        )
     close_file(partitions, pqwriters, pqwriter_no_part)
 
+
 def map_protein_for_tsv(path: str, fasta: str, output_path: str, map_parameter: str):
     """
     according fasta database to map the proteins accessions to uniprot names.