From a36dd25259241d25781e62194ed0b18c90c5cf42 Mon Sep 17 00:00:00 2001 From: zprobot <1727697083@qq.com> Date: Tue, 24 Oct 2023 18:47:36 +0800 Subject: [PATCH] generate_report --- docs/tools.rst | 17 ++++++- .../commands/generate_report_command.py | 32 ++++++++++++++ python/quantmsio/quantms_io/core/tools.py | 44 ++++++++++++++++++- python/quantmsio/quantms_io/quantmsio_cli.py | 2 + 4 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 python/quantmsio/quantms_io/commands/generate_report_command.py diff --git a/docs/tools.rst b/docs/tools.rst index 6608e35..17ecd7b 100644 --- a/docs/tools.rst +++ b/docs/tools.rst @@ -179,7 +179,7 @@ The absolute expression format aims to visualize absolute expression (AE) result iBAQ values and store the AE results of each protein on each sample. - If you have generated project.json, you can use this parameter ``--project_file`` to add project information for AE files. -- IF you want to know ibaq, please read `ibaqpy `__ +- If you want to know ibaq, please read `ibaqpy `__ - If you want to know more, please read :doc:`ae`. Example: @@ -338,6 +338,21 @@ Example: --parquet_path_two res_lfq2_no_cache.parquet --report_path report.txt +Generate report about files +----------------------------- +This tool is used to generate report about all feature files or psm files. +You can build ``psm parquet`` or ``feature parquet`` multiple times for the same project and use this command to verify its consistency. + +- ``--label`` contains two options: ``psm`` and ``feature` + +Example: + +.. code:: python + + python generate_report_command.py generate_report_about_files + --check_dir file_path + --label psm + Register file -------------------------- This tool is used to register the file to ``project.json``. diff --git a/python/quantmsio/quantms_io/commands/generate_report_command.py b/python/quantmsio/quantms_io/commands/generate_report_command.py new file mode 100644 index 0000000..e90aec4 --- /dev/null +++ b/python/quantmsio/quantms_io/commands/generate_report_command.py @@ -0,0 +1,32 @@ +import click + +from quantms_io.core.tools import generate_report_of_psms_or_features +CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) + + +@click.group(context_settings=CONTEXT_SETTINGS) +def cli(): + """ + This is the main tool that gives access to all commands. + """ + +@click.command( + "generate_report_about_files", + short_help="generateb report of psm or feature files " "format", +) +@click.option( + "--check_dir", help="Folder to generate the df expression file.", required=True +) +@click.option('--label', type=click.Choice(['feature', 'psm'], case_sensitive=False),help='parquet type') +@click.pass_context +def generate_report_about_files(ctx, check_dir: str, label: str): + ''' + ckeck_dir: psm or feature file file directory + label: psm or feature + ''' + generate_report_of_psms_or_features(check_dir=check_dir,label=label) + +cli.add_command(generate_report_about_files) + +if __name__ == '__main__': + cli() \ No newline at end of file diff --git a/python/quantmsio/quantms_io/core/tools.py b/python/quantmsio/quantms_io/core/tools.py index eb95784..86d0f28 100644 --- a/python/quantmsio/quantms_io/core/tools.py +++ b/python/quantmsio/quantms_io/core/tools.py @@ -23,7 +23,7 @@ from quantms_io.core.psm import PSMHandler from quantms_io.core.project import ProjectHandler from quantms_io.utils.file_utils import extract_len - +from quantms_io.core.project import create_uuid_filename @@ -323,3 +323,45 @@ def register_file_to_json(project_file,attach_file,category,replace_existing): Register= ProjectHandler(project_json_file=project_file) Register.add_quantms_file(attach_file,category,replace_existing) Register.save_updated_project_info(output_file_name=project_file) + +#check result of psms or features +def generate_report_of_psms_or_features(check_dir,label): + if not os.path.exists(check_dir): + raise Exception("not file path") + file_list = os.listdir(check_dir) + if label == 'psm': + check_list = [file for file in file_list if file.endswith(".psm.parquet")] + elif label == 'feature': + check_list = [file for file in file_list if file.endswith(".feature.parquet")] + + output_lines = '' + for file_path in check_list: + output_lines += 'Name: ' + file_path + '\n' + file_path = check_dir + "/" + file_path + file_size = getFileSize(file_path) + output_lines += 'File size: ' + file_size + '\n' + df = pd.read_parquet(file_path,columns=['protein_accessions','peptidoform','charge']) + output_lines += 'Total number of Peptides: ' + str(len(df.groupby(['peptidoform','charge']))) + '\n' + proteins = set() + df['protein_accessions'].apply(lambda x: proteins.update(set(x))) + output_lines += 'Total number of Proteins: ' + str(len(proteins)) + '\n\n' + + output_path = create_uuid_filename(label+'s_report','.txt') + with open(output_path, "w",encoding='utf8') as f: + f.write(output_lines) + +def getFileSize(filePath): + fsize = os.path.getsize(filePath) + if fsize < 1024: + return str(round(fsize,2)) + 'Byte' + else: + KBX = fsize/1024 + if KBX < 1024: + return str(round(KBX,2)) + 'K' + else: + MBX = KBX /1024 + if MBX < 1024: + return str(round(MBX,2)) + 'M' + else: + return str(round(MBX/1024)) + 'G' + \ No newline at end of file diff --git a/python/quantmsio/quantms_io/quantmsio_cli.py b/python/quantmsio/quantms_io/quantmsio_cli.py index bf72093..6fc2620 100644 --- a/python/quantmsio/quantms_io/quantmsio_cli.py +++ b/python/quantmsio/quantms_io/quantmsio_cli.py @@ -15,6 +15,7 @@ from quantms_io.commands.get_unanimous_command import get_unanimous_for_parquet,get_unanimous_for_tsv from quantms_io.commands.feature_command import convert_feature_file from quantms_io.commands.psm_command import convert_psm_file, compare_set_of_psms +from quantms_io.commands.generate_report_command import generate_report_about_files CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -39,6 +40,7 @@ def cli(): cli.add_command(attach_file_to_json) cli.add_command(get_unanimous_for_parquet) cli.add_command(get_unanimous_for_tsv) +cli.add_command(generate_report_about_files) def quantms_io_main(): """