Skip to content

Commit

Permalink
update: query
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed May 19, 2024
1 parent 3f1fcdc commit d242f70
Show file tree
Hide file tree
Showing 38 changed files with 1,227 additions and 398 deletions.
12 changes: 9 additions & 3 deletions quantmsio/commands/absolute_expression_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,15 @@
help="quantms.io project file",
required=False,
)
@click.option("--output_folder", help="Folder to generate the df expression file.", required=True)
@click.option("--output_prefix_file", help="Prefix of the df expression file", required=False)
@click.option("--delete_existing", help="Delete existing files in the output folder", is_flag=True)
@click.option(
"--output_folder", help="Folder to generate the df expression file.", required=True
)
@click.option(
"--output_prefix_file", help="Prefix of the df expression file", required=False
)
@click.option(
"--delete_existing", help="Delete existing files in the output folder", is_flag=True
)
def convert_ibaq_absolute(
ibaq_file: str,
sdrf_file: str,
Expand Down
4 changes: 3 additions & 1 deletion quantmsio/commands/attach_file_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
short_help="Register the file to project.json.",
)
@click.option("--project_file", help="the project.json file", required=True)
@click.option("--attach_file", help="The path of the file that will be registered", required=True)
@click.option(
"--attach_file", help="The path of the file that will be registered", required=True
)
@click.option(
"--category",
type=click.Choice(
Expand Down
18 changes: 14 additions & 4 deletions quantmsio/commands/diann_convert_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
help="the design file path",
required=True,
)
@click.option("--qvalue_threshold", help="qvalue_threshold", required=True, default=0.05)
@click.option(
"--qvalue_threshold", help="qvalue_threshold", required=True, default=0.05
)
@click.option(
"--mzml_info_folder",
help="the foldef of mzml_info tsv file",
Expand All @@ -45,7 +47,9 @@
"--duckdb_max_memory",
help="The maximum amount of memory allocated by the DuckDB engine (e.g 4GB)",
)
@click.option("--duckdb_threads", help="The number of threads for the DuckDB engine (e.g 4)")
@click.option(
"--duckdb_threads", help="The number of threads for the DuckDB engine (e.g 4)"
)
@click.option(
"--file_num",
help="The number of files being processed at the same time",
Expand Down Expand Up @@ -81,8 +85,14 @@ def diann_convert_to_parquet(
if not output_prefix_file:
output_prefix_file = ""

feature_output_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".feature.parquet")
psm_output_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".psm.parquet")
feature_output_path = (
output_folder
+ "/"
+ create_uuid_filename(output_prefix_file, ".feature.parquet")
)
psm_output_path = (
output_folder + "/" + create_uuid_filename(output_prefix_file, ".psm.parquet")
)

dia_nn = DiaNNConvert()

Expand Down
12 changes: 9 additions & 3 deletions quantmsio/commands/differential_expression_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@
required=False,
default="0.05",
)
@click.option("--output_folder", help="Folder to generate the df expression file.", required=True)
@click.option("--output_prefix_file", help="Prefix of the df expression file", required=False)
@click.option("--delete_existing", help="Delete existing files in the output folder", is_flag=True)
@click.option(
"--output_folder", help="Folder to generate the df expression file.", required=True
)
@click.option(
"--output_prefix_file", help="Prefix of the df expression file", required=False
)
@click.option(
"--delete_existing", help="Delete existing files in the output folder", is_flag=True
)
def convert_msstats_differential(
msstats_file: str,
sdrf_file: str,
Expand Down
13 changes: 11 additions & 2 deletions quantmsio/commands/feature_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,24 @@ def convert_feature_file(
:return: none
"""

if sdrf_file is None or msstats_file is None or mztab_file is None or output_folder is None:
if (
sdrf_file is None
or msstats_file is None
or mztab_file is None
or output_folder is None
):
raise click.UsageError("Please provide all the required parameters")
if use_cache is None:
use_cache = False

feature_manager = FeatureHandler()
if not output_prefix_file:
output_prefix_file = ""
feature_manager.parquet_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".feature.parquet")
feature_manager.parquet_path = (
output_folder
+ "/"
+ create_uuid_filename(output_prefix_file, ".feature.parquet")
)
if consensusxml_file is not None:
feature_manager.convert_mztab_msstats_to_feature(
mztab_file=mztab_file,
Expand Down
8 changes: 6 additions & 2 deletions quantmsio/commands/generate_gene_msg_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
)
@click.option(
"--map_parameter",
type=click.Choice(["map_protein_name", "map_protein_accession"], case_sensitive=False),
type=click.Choice(
["map_protein_name", "map_protein_accession"], case_sensitive=False
),
help="map type",
)
@click.option("--species", help="species", default="human")
Expand All @@ -38,4 +40,6 @@ def map_gene_msg_to_parquet(
if not output_path.endswith("parquet"):
raise click.UsageError("Please provide file extension(.parquet)")

map_gene_msgs_to_parquet(parquet_path, fasta_path, map_parameter, output_path, label, species)
map_gene_msgs_to_parquet(
parquet_path, fasta_path, map_parameter, output_path, label, species
)
4 changes: 3 additions & 1 deletion quantmsio/commands/generate_project_report_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"generate-project-report",
short_help="generate report of project " "format",
)
@click.option("--project_folder", help="Folder to generate the df expression file.", required=True)
@click.option(
"--project_folder", help="Folder to generate the df expression file.", required=True
)
def generate_report_about_project(project_folder):
"""
project_folder: The folder path for the full project.
Expand Down
4 changes: 3 additions & 1 deletion quantmsio/commands/generate_spectra_message_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,6 @@ def map_spectrum_message_to_parquet(
"""
if not output_path.endswith("parquet"):
raise click.UsageError("Please provide file extension(.parquet)")
generate_features_of_spectrum(parquet_path, mzml_directory, output_path, label, file_num, partition)
generate_features_of_spectrum(
parquet_path, mzml_directory, output_path, label, file_num, partition
)
4 changes: 3 additions & 1 deletion quantmsio/commands/generate_start_and_end_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
help="parquet type",
)
@click.option("--output_path", help="save path", required=True)
def inject_start_and_end_from_fasta(parquet_path: str, fasta_path: str, label: str, output_path: str):
def inject_start_and_end_from_fasta(
parquet_path: str, fasta_path: str, label: str, output_path: str
):
"""
Register the file with project.json
:param parquet_path: psm or feature parquet file path
Expand Down
8 changes: 6 additions & 2 deletions quantmsio/commands/get_unanimous_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def labels():
@click.option("--output_path", help="output file path")
@click.option(
"--map_parameter",
type=click.Choice(["map_protein_name", "map_protein_accession"], case_sensitive=False),
type=click.Choice(
["map_protein_name", "map_protein_accession"], case_sensitive=False
),
help="map type",
)
@click.option(
Expand Down Expand Up @@ -53,7 +55,9 @@ def get_unanimous_for_parquet(parquet_path, fasta, output_path, map_parameter, l
@click.option("--output_path", help="output file path")
@click.option(
"--map_parameter",
type=click.Choice(["map_protein_name", "map_protein_accession"], case_sensitive=False),
type=click.Choice(
["map_protein_name", "map_protein_accession"], case_sensitive=False
),
help="map type",
)
def get_unanimous_for_tsv(path, fasta, output_path, map_parameter):
Expand Down
8 changes: 6 additions & 2 deletions quantmsio/commands/load_best_scan_number_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
short_help="inject bset_psm_scan_number to feature",
)
@click.option("--diann_psm_path", help="diann psm parquet file path", required=True)
@click.option("--diann_feature_path", help="diann feature parquet file path", required=True)
@click.option(
"--diann_feature_path", help="diann feature parquet file path", required=True
)
@click.option("--output_path", help="save path", required=True)
def inject_bset_psm_scan_number(diann_psm_path: str, diann_feature_path: str, output_path: str):
def inject_bset_psm_scan_number(
diann_psm_path: str, diann_feature_path: str, output_path: str
):
"""
Register the file with project.json
:param diann_psm_path: diann psm parquet file path
Expand Down
20 changes: 15 additions & 5 deletions quantmsio/commands/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,21 @@ def plot_peptides(ctx, psm_parquet_path: str, sdrf_path: str, save_path: str):
:param save_path: img save path [xxx.png]
:return: none
"""
plot_peptides_of_lfq_condition(psm_parquet_path=psm_parquet_path, sdrf_path=sdrf_path, save_path=save_path)
plot_peptides_of_lfq_condition(
psm_parquet_path=psm_parquet_path, sdrf_path=sdrf_path, save_path=save_path
)


@plot.command("plot-ibaq-distribution", short_help="plot ibaq distribution of expression")
@plot.command(
"plot-ibaq-distribution", short_help="plot ibaq distribution of expression"
)
@click.option("--ibaq_path", help="ibaq file path", required=True)
@click.option("--save_path", help="img save path [xxx.svg]", required=True)
@click.option("--select_column", help="Selected column in Ibaq File", required=False)
@click.pass_context
def plot_ibaq_distribution(ctx, ibaq_path: str, save_path: str, select_column: str) -> None:
def plot_ibaq_distribution(
ctx, ibaq_path: str, save_path: str, select_column: str
) -> None:
"""
plot ibaq distribution of expression
:param ibaq_path: ibaq file path
Expand All @@ -59,7 +65,9 @@ def plot_ibaq_distribution(ctx, ibaq_path: str, save_path: str, select_column: s
@click.option("--save_path", help="img save path [xxx.svg]", required=True)
@click.option("--num_samples", help="The number of samples plotted", default=10)
@click.pass_context
def plot_kde_intensity_distribution(feature_path: str, save_path: str, num_samples: int):
def plot_kde_intensity_distribution(
feature_path: str, save_path: str, num_samples: int
):
"""
plot ibaq distribution of expression
:param feature_path: feature file path
Expand Down Expand Up @@ -99,7 +107,9 @@ def plot_bar_peptide_distribution(feature_path: str, save_path: str, num_samples
@click.option("--save_path", help="img save path [xxx.svg]", required=True)
@click.option("--num_samples", help="The number of samples plotted", default=10)
@click.pass_context
def plot_box_intensity_distribution(feature_path: str, save_path: str, num_samples: int):
def plot_box_intensity_distribution(
feature_path: str, save_path: str, num_samples: int
):
"""
plot ibaq distribution of expression
:param feature_path: feature file path
Expand Down
16 changes: 12 additions & 4 deletions quantmsio/commands/psm_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
help="Prefix of the parquet file needed to generate the file name",
required=False,
)
@click.option("--verbose", help="Output debug information.", default=False, is_flag=True)
@click.option(
"--verbose", help="Output debug information.", default=False, is_flag=True
)
def convert_psm_file(
mztab_file: str,
output_folder: str,
Expand All @@ -55,7 +57,9 @@ def convert_psm_file(
output_prefix_file = ""

psm_manager = PSMHandler()
psm_manager.parquet_path = output_folder + "/" + create_uuid_filename(output_prefix_file, ".psm.parquet")
psm_manager.parquet_path = (
output_folder + "/" + create_uuid_filename(output_prefix_file, ".psm.parquet")
)
psm_manager.convert_mztab_to_psm(
mztab_path=mztab_file,
parquet_path=psm_manager.parquet_path,
Expand All @@ -65,7 +69,9 @@ def convert_psm_file(


@click.command("compare-set-psms", short_help="plot venn for a set of Psms parquet")
@click.option("-p", "--parquets", type=str, help="List of psm parquet path", multiple=True)
@click.option(
"-p", "--parquets", type=str, help="List of psm parquet path", multiple=True
)
@click.option("-t", "--tags", type=str, help="List of parquet label", multiple=True)
def compare_set_of_psms(parquets, tags):
"""
Expand All @@ -74,7 +80,9 @@ def compare_set_of_psms(parquets, tags):
:param tags: a set of psm label
"""
if len(parquets) != len(tags):
raise click.UsageError("Please provide same length of parquet_list and label_list")
raise click.UsageError(
"Please provide same length of parquet_list and label_list"
)

plot_peptidoform_charge_venn(parquets, tags)
plot_sequence_venn(parquets, tags)
18 changes: 13 additions & 5 deletions quantmsio/commands/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def statistics():
@click.option("--parquet_path", help="psm parquet path in lfq", required=True)
@click.option(
"--save_path",
help="file with the statistics (e.g. statistics.csv), if not provided," " will print to stdout",
help="file with the statistics (e.g. statistics.csv), if not provided,"
" will print to stdout",
)
@click.pass_context
def feature_file_statistics(ctx, absolute_path: str, parquet_path: str, save_path: str):
Expand All @@ -40,11 +41,15 @@ def write_stats(file, stats: ParquetStatistics):
file.write("Number of proteins: {}\n".format(stats.get_number_of_proteins()))
file.write("Number of peptides: {}\n".format(stats.get_number_of_peptides()))
file.write("Number of samples: {}\n".format(stats.get_number_of_samples()))
file.write("Number of peptidoforms: {}\n".format(stats.get_number_of_peptidoforms()))
file.write(
"Number of peptidoforms: {}\n".format(stats.get_number_of_peptidoforms())
)
file.write("Number of msruns: {}\n".format(stats.get_number_msruns()))

def write_absolute_stats(file, stats: IbaqStatistics):
file.write("Ibaq Number of proteins: {}\n".format(stats.get_number_of_proteins()))
file.write(
"Ibaq Number of proteins: {}\n".format(stats.get_number_of_proteins())
)
file.write("Ibaq Number of samples: {}\n".format(stats.get_number_of_samples()))

if save_path:
Expand All @@ -65,7 +70,8 @@ def write_absolute_stats(file, stats: IbaqStatistics):
@click.option("--parquet_path", help="psm parquet path in lfq", required=True)
@click.option(
"--save_path",
help="file with the statistics (e.g. statistics.csv), if not provided," " will print to stdout",
help="file with the statistics (e.g. statistics.csv), if not provided,"
" will print to stdout",
)
@click.pass_context
def parquet_psm_statistics(ctx, parquet_path: str, save_path: str):
Expand All @@ -79,7 +85,9 @@ def parquet_psm_statistics(ctx, parquet_path: str, save_path: str):
def write_stats(file, stats: ParquetStatistics):
file.write("Number of proteins: {}\n".format(stats.get_number_of_proteins()))
file.write("Number of peptides: {}\n".format(stats.get_number_of_peptides()))
file.write("Number of peptidoforms: {}\n".format(stats.get_number_of_peptidoforms()))
file.write(
"Number of peptidoforms: {}\n".format(stats.get_number_of_peptidoforms())
)
file.write("Number of psms: {}\n".format(stats.get_number_of_psms()))
file.write("Number of msruns: {}\n".format(stats.get_number_msruns()))

Expand Down
28 changes: 22 additions & 6 deletions quantmsio/core/ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,25 @@ def convert_ibaq_to_quantms(
output_lines = ""
if self.project_manager:
output_lines += (
"#project_accession: " + self.project_manager.project.project_info["project_accession"] + "\n"
"#project_accession: "
+ self.project_manager.project.project_info["project_accession"]
+ "\n"
)
output_lines += "#project_title: " + self.project_manager.project.project_info["project_title"] + "\n"
output_lines += (
"#project_description: " + self.project_manager.project.project_info["project_description"] + "\n"
"#project_title: "
+ self.project_manager.project.project_info["project_title"]
+ "\n"
)
output_lines += (
"#project_description: "
+ self.project_manager.project.project_info["project_description"]
+ "\n"
)
output_lines += (
"#quantms_version: "
+ self.project_manager.project.project_info["quantms_version"]
+ "\n"
)
output_lines += "#quantms_version: " + self.project_manager.project.project_info["quantms_version"] + "\n"
factor_value = self.get_factor_value()
if factor_value is not None:
output_lines += "#factor_value: " + factor_value + "\n"
Expand Down Expand Up @@ -127,8 +139,12 @@ def convert_ibaq_to_quantms(
f.write(output_lines)

if self.project_manager:
self.project_manager.add_quantms_file(file_category="absolute_file", file_name=output_filename)
logger.info(f"Absolute expression file copied to {output_filename} and added to the project information")
self.project_manager.add_quantms_file(
file_category="absolute_file", file_name=output_filename
)
logger.info(
f"Absolute expression file copied to {output_filename} and added to the project information"
)

def get_factor_value(self):
"""
Expand Down
4 changes: 3 additions & 1 deletion quantmsio/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def __init__(self, name_prefix: str):
# Create a cache name using a hash and uuid
if name_prefix is None:
name_prefix = "generic"
self._cache_name = str("_cache_name_{}_{}".format(name_prefix, uuid.uuid4().hex))
self._cache_name = str(
"_cache_name_{}_{}".format(name_prefix, uuid.uuid4().hex)
)
self.cache = diskcache.Cache(self._cache_name, statistics=True)
self.cache.create_tag_index()

Expand Down
Loading

0 comments on commit d242f70

Please sign in to comment.