Skip to content

Commit

Permalink
generate_report
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Oct 24, 2023
1 parent 9d53855 commit a36dd25
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 2 deletions.
17 changes: 16 additions & 1 deletion docs/tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ The absolute expression format aims to visualize absolute expression (AE) result
iBAQ values and store the AE results of each protein on each sample.

- If you have generated project.json, you can use this parameter ``--project_file`` to add project information for AE files.
- IF you want to know ibaq, please read `ibaqpy <https://github.com/bigbio/ibaqpy>`__
- If you want to know ibaq, please read `ibaqpy <https://github.com/bigbio/ibaqpy>`__
- If you want to know more, please read :doc:`ae`.

Example:
Expand Down Expand Up @@ -338,6 +338,21 @@ Example:
--parquet_path_two res_lfq2_no_cache.parquet
--report_path report.txt
Generate report about files
-----------------------------
This tool is used to generate report about all feature files or psm files.
You can build ``psm parquet`` or ``feature parquet`` multiple times for the same project and use this command to verify its consistency.

- ``--label`` contains two options: ``psm`` and ``feature`
Example:
.. code:: python
python generate_report_command.py generate_report_about_files
--check_dir file_path
--label psm
Register file
--------------------------
This tool is used to register the file to ``project.json``.
Expand Down
32 changes: 32 additions & 0 deletions python/quantmsio/quantms_io/commands/generate_report_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import click

from quantms_io.core.tools import generate_report_of_psms_or_features
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


@click.group(context_settings=CONTEXT_SETTINGS)
def cli():
"""
This is the main tool that gives access to all commands.
"""

@click.command(
"generate_report_about_files",
short_help="generateb report of psm or feature files " "format",
)
@click.option(
"--check_dir", help="Folder to generate the df expression file.", required=True
)
@click.option('--label', type=click.Choice(['feature', 'psm'], case_sensitive=False),help='parquet type')
@click.pass_context
def generate_report_about_files(ctx, check_dir: str, label: str):
'''
ckeck_dir: psm or feature file file directory
label: psm or feature
'''
generate_report_of_psms_or_features(check_dir=check_dir,label=label)

cli.add_command(generate_report_about_files)

if __name__ == '__main__':
cli()
44 changes: 43 additions & 1 deletion python/quantmsio/quantms_io/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from quantms_io.core.psm import PSMHandler
from quantms_io.core.project import ProjectHandler
from quantms_io.utils.file_utils import extract_len

from quantms_io.core.project import create_uuid_filename



Expand Down Expand Up @@ -323,3 +323,45 @@ def register_file_to_json(project_file,attach_file,category,replace_existing):
Register= ProjectHandler(project_json_file=project_file)
Register.add_quantms_file(attach_file,category,replace_existing)
Register.save_updated_project_info(output_file_name=project_file)

#check result of psms or features
def generate_report_of_psms_or_features(check_dir,label):
if not os.path.exists(check_dir):
raise Exception("not file path")
file_list = os.listdir(check_dir)
if label == 'psm':
check_list = [file for file in file_list if file.endswith(".psm.parquet")]
elif label == 'feature':
check_list = [file for file in file_list if file.endswith(".feature.parquet")]

output_lines = ''
for file_path in check_list:
output_lines += 'Name: ' + file_path + '\n'
file_path = check_dir + "/" + file_path
file_size = getFileSize(file_path)
output_lines += 'File size: ' + file_size + '\n'
df = pd.read_parquet(file_path,columns=['protein_accessions','peptidoform','charge'])
output_lines += 'Total number of Peptides: ' + str(len(df.groupby(['peptidoform','charge']))) + '\n'
proteins = set()
df['protein_accessions'].apply(lambda x: proteins.update(set(x)))
output_lines += 'Total number of Proteins: ' + str(len(proteins)) + '\n\n'

output_path = create_uuid_filename(label+'s_report','.txt')
with open(output_path, "w",encoding='utf8') as f:
f.write(output_lines)

def getFileSize(filePath):
fsize = os.path.getsize(filePath)
if fsize < 1024:
return str(round(fsize,2)) + 'Byte'
else:
KBX = fsize/1024
if KBX < 1024:
return str(round(KBX,2)) + 'K'
else:
MBX = KBX /1024
if MBX < 1024:
return str(round(MBX,2)) + 'M'
else:
return str(round(MBX/1024)) + 'G'

2 changes: 2 additions & 0 deletions python/quantmsio/quantms_io/quantmsio_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from quantms_io.commands.get_unanimous_command import get_unanimous_for_parquet,get_unanimous_for_tsv
from quantms_io.commands.feature_command import convert_feature_file
from quantms_io.commands.psm_command import convert_psm_file, compare_set_of_psms
from quantms_io.commands.generate_report_command import generate_report_about_files

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])

Expand All @@ -39,6 +40,7 @@ def cli():
cli.add_command(attach_file_to_json)
cli.add_command(get_unanimous_for_parquet)
cli.add_command(get_unanimous_for_tsv)
cli.add_command(generate_report_about_files)

def quantms_io_main():
"""
Expand Down

0 comments on commit a36dd25

Please sign in to comment.