Skip to content

Commit

Permalink
minor_change
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Nov 25, 2023
1 parent dd56b5e commit 76eb43a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
1 change: 1 addition & 0 deletions docs/tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ Example:
--modifications "Carbamidomethyl (C)" "null"
--qvalue_threshold 0.05
--mzml_info_folder mzml
--sdrf_path PXD037682.sdrf.tsv
--output_folder result
--output_prefix_file PXD037682
--threads 60
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ def cli():
help="mzml info tsv file",
required=True,
)
@click.option(
"--sdrf_path",
help="the SDRF file needed to extract some of the metadata",
required=True,
)
@click.option(
"--output_folder",
help="Folder where the Json file will be generated",
Expand All @@ -50,13 +55,14 @@ def cli():
@click.option("--output_prefix_file", help="Prefix of the Json file needed to generate the file name", required=False)
@click.option("--threads",help="The number of thread", default=60)
@click.pass_context
def diann_convert_to_parquet(ctx,report_path:str,design_file:str,modifications:List,qvalue_threshold: float,mzml_info_folder:str,output_folder:str,output_prefix_file:str,threads:int):
def diann_convert_to_parquet(ctx,report_path:str,design_file:str,modifications:List,qvalue_threshold: float,mzml_info_folder:str,sdrf_path:str,output_folder:str,output_prefix_file:str,threads:int):
'''
report_path: diann report file path
design_file: the disign file path
modifications: a list contains fix modifications and variable modifications
qvalue_threshold: qvalue threshold
mzml_info_folder: mzml info file folder
sdrf_path: sdrf file path
output_folder: Folder where the Json file will be generated
output_prefix_file: Prefix of the Json file needed to generate the file name
threads: The number of thread
Expand All @@ -78,6 +84,7 @@ def diann_convert_to_parquet(ctx,report_path:str,design_file:str,modifications:L
folder=mzml_info_folder,
design_file=design_file,
modifications=modifications,
sdrf_path = sdrf_path,
psm_output_path=psm_output_path,
feature_output_path = feature_output_path,
thread_num = threads
Expand Down
10 changes: 6 additions & 4 deletions python/quantmsio/quantms_io/core/diann_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,15 @@ def intergrate_msg(n):
report = self.add_additional_msg(report)
yield report

def generate_psm_and_feature_file(self,report_path: str, qvalue_threshold: float,folder: str,design_file:str,modifications:list,psm_output_path:str,feature_output_path:str,thread_num:int=60):
def generate_psm_and_feature_file(self,report_path: str, qvalue_threshold: float,folder: str,design_file:str,modifications:list,sdrf_path:str,psm_output_path:str,feature_output_path:str,thread_num:int=60):
psm_pqwriter = None
feature_pqwriter = None

s_data_frame, f_table = get_exp_design_dfs(design_file)
self._modifications = get_modifications(modifications[0], modifications[1])
for report in self.main_report_df(report_path, qvalue_threshold,folder,thread_num):
psm_pqwriter = self.generate_psm_file(report,psm_pqwriter,psm_output_path)
feature_pqwriter = self.generate_feature_file(report,s_data_frame,f_table,feature_pqwriter,feature_output_path)
feature_pqwriter = self.generate_feature_file(report,s_data_frame,f_table,sdrf_path,feature_pqwriter,feature_output_path)
if psm_pqwriter:
psm_pqwriter.close()
if feature_pqwriter:
Expand Down Expand Up @@ -332,8 +332,9 @@ def generate_psm_file(self,report,psm_pqwriter,psm_output_path):
psm_pqwriter.write_table(parquet_table)
return psm_pqwriter

def generate_feature_file(self,report,s_data_frame,f_table,feature_pqwriter,feature_output_path):

def generate_feature_file(self,report,s_data_frame,f_table,sdrf_path,feature_pqwriter,feature_output_path):

sample_name = pd.read_csv(sdrf_path,sep='\t',usecols=['source name'],nrows=1)['source name'].values[0].split('-')[0]
report = report[report["intensity"] != 0]
report.loc[:,"fragment_ion"] = "NA"
report.loc[:,"isotope_label_type"] = "L"
Expand All @@ -360,6 +361,7 @@ def generate_feature_file(self,report,s_data_frame,f_table,feature_pqwriter,feat
peptide_score_name + ":" + report["global_qvalue"].astype(str).values + ","
+ "Best PSM PEP:" + report["posterior_error_probability"].astype(str).values
)
report['sample_accession'] = sample_name + '-Sample-' + report['sample_accession'].astype(str).values
schema = FeatureHandler()
feature = FeatureInMemory('LFQ',schema.schema)
feature._modifications = self._modifications
Expand Down

0 comments on commit 76eb43a

Please sign in to comment.