Skip to content

Commit

Permalink
cv_params added to specification
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Nov 10, 2024
1 parent 8cf0208 commit 293cd8b
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 17 deletions.
2 changes: 1 addition & 1 deletion quantmsio/commands/generate_gene_message_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ def map_gene_message_to_parquet(
"""
if partitions:
partitions = partitions.split(",")
generate_feature_of_gene(parquet_path, fasta, output_folder, file_num, partitions, species)
generate_feature_of_gene(parquet_path, fasta, output_folder, file_num, partitions, species)
4 changes: 3 additions & 1 deletion quantmsio/commands/maxquant_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,6 @@ def convert_maxquant_feature(

MQ = MaxQuant()
output_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".feature.parquet")
MQ.convert_feature_to_parquet(evidence_path=evidence_file, sdrf_path=sdrf_file, output_path=output_path, chunksize=chunksize)
MQ.convert_feature_to_parquet(
evidence_path=evidence_file, sdrf_path=sdrf_file, output_path=output_path, chunksize=chunksize
)
2 changes: 1 addition & 1 deletion quantmsio/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"Modified sequence": "peptidoform",
"Raw file": "reference_file_name",
"Score": "additional_scores",
"PIF": "parent_ion_score"
"PIF": "parent_ion_score",
}

MAXQUANT_FEATURE_MAP = {
Expand Down
4 changes: 3 additions & 1 deletion quantmsio/core/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,9 @@ def main_operate(self, df: pd.DataFrame):
df["additional_scores"] = df["additional_scores"].apply(
lambda x: [{"score_name": "maxquant_score", "score_value": np.float32(x)}]
)
df.loc[:, "cv_params"] = df["parent_ion_score"].apply(lambda socre: [{"cv_name": "parent_ion_score", "cv_value": str(socre)}])
df.loc[:, "cv_params"] = df["parent_ion_score"].apply(
lambda socre: [{"cv_name": "parent_ion_score", "cv_value": str(socre)}]
)
df.loc[:, "predicted_rt"] = None
df.loc[:, "ion_mobility"] = None
return df
Expand Down
1 change: 1 addition & 0 deletions quantmsio/operate/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import seaborn as sns
from quantmsio.operate.tools import transform_ibaq


def plot_distribution_of_ibaq(ibaq_path: str, save_path: str = None, selected_column: str = None) -> None:
"""
This function plots the distribution of the protein IBAQ values.
Expand Down
3 changes: 1 addition & 2 deletions quantmsio/operate/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def inject_gene_msg(
def get_protein_to_gene_map(self, fasta: str, map_parameter: str = "map_protein_accession"):
map_gene_names = generate_gene_name_map(fasta, map_parameter)
return map_gene_names

def get_protein_dict(self, fasta_path):
"""
return: protein_map {protein_accession:seq}
Expand Down Expand Up @@ -320,4 +320,3 @@ def get_gene_accessions(self, gene_list: list, species: str = "human"):
if "accession" in obj and "genomic" in obj["accession"]:
gene_accessions_maps[obj["query"]] = ",".join(obj["accession"]["genomic"])
return gene_accessions_maps

27 changes: 16 additions & 11 deletions quantmsio/operate/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@ def init_save_info(parquet_path: str):
pqwriters = {}
pqwriter_no_part = None
filename = os.path.basename(parquet_path)
return pqwriters, pqwriter_no_part,filename
return pqwriters, pqwriter_no_part, filename

def close_file(partitions:list ,pqwriters: dict, pqwriter_no_part: str):

def close_file(partitions: list, pqwriters: dict, pqwriter_no_part: str):
if not partitions or len(partitions) == 0:
if pqwriter_no_part:
pqwriter_no_part.close()
else:
for pqwriter in pqwriters.values():
pqwriter.close()


def generate_psms_of_spectrum(
parquet_path: str,
mzml_directory: str,
Expand All @@ -54,11 +56,15 @@ def generate_psms_of_spectrum(
axis=1,
result_type="expand",
)
pqwriters, pqwriter_no_part = save_parquet_file(partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA)
pqwriters, pqwriter_no_part = save_parquet_file(
partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA
)
close_file(partitions, pqwriters, pqwriter_no_part)


def save_parquet_file(partitions, table, output_folder, filename, pqwriters = {}, pqwriter_no_part=None, schema=FEATURE_SCHEMA):
def save_parquet_file(
partitions, table, output_folder, filename, pqwriters={}, pqwriter_no_part=None, schema=FEATURE_SCHEMA
):

if partitions and len(partitions) > 0:
for key, df in table.groupby(partitions):
Expand All @@ -83,22 +89,21 @@ def save_parquet_file(partitions, table, output_folder, filename, pqwriters = {}
pqwriter_no_part.write_table(parquet_table)
return pqwriters, pqwriter_no_part


def generate_feature_of_gene(
parquet_path: str,
fasta: str,
output_folder: str,
file_num: int,
partitions: list = None,
species: str = "human"
parquet_path: str, fasta: str, output_folder: str, file_num: int, partitions: list = None, species: str = "human"
):
pqwriters, pqwriter_no_part, filename = init_save_info(parquet_path)
p = Query(parquet_path)
map_gene_names = p.get_protein_to_gene_map(fasta)
for _, table in p.iter_file(file_num=file_num):
table = p.inject_gene_msg(table, map_gene_names, species)
pqwriters, pqwriter_no_part = save_parquet_file(partitions, table, output_folder, filename, pqwriters, pqwriter_no_part)
pqwriters, pqwriter_no_part = save_parquet_file(
partitions, table, output_folder, filename, pqwriters, pqwriter_no_part
)
close_file(partitions, pqwriters, pqwriter_no_part)


def map_protein_for_tsv(path: str, fasta: str, output_path: str, map_parameter: str):
"""
according fasta database to map the proteins accessions to uniprot names.
Expand Down

0 comments on commit 293cd8b

Please sign in to comment.