diff --git a/.gitignore b/.gitignore index 381d127..476aec7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ build .idea data docs/.vscode -__pycache__ \ No newline at end of file +__pycache__ +.vscode \ No newline at end of file diff --git a/docs/README.adoc b/docs/README.adoc index 3cf3f34..9df6480 100644 --- a/docs/README.adoc +++ b/docs/README.adoc @@ -549,7 +549,7 @@ The following fields are shared among Peptide-based views: <>, <>, | Modified.Sequence | Modified Peptide | Modified sequence -| - +| opt_global_cv_MS:1000889_peptidoform_sequence | `modifications` | List of modifications as a string array, easy for search and filter @@ -557,7 +557,7 @@ The following fields are shared among Peptide-based views: <>, <>, | - | - | Modifications -| - +| modifications | `modification_details` | List of alternative site probabilities for the modification format: read <> @@ -573,7 +573,7 @@ The following fields are shared among Peptide-based views: <>, <>, | PEP | x | PEP -| modifications +| opt_global_Posterior_Error_Probability_score | `global_qvalue` | Global q-value for the peptide or psm at the level of the experiment @@ -581,7 +581,7 @@ The following fields are shared among Peptide-based views: <>, <>, | Global.Q.Value | x | - -| - +| opt_global_q-value 7+^| Peptide fields shared by <> <> | `is_decoy` @@ -590,12 +590,12 @@ The following fields are shared among Peptide-based views: <>, <>, | - | - | Reverse -| - +| opt_global_cv_MS:1002217_decoy_peptide | `calculated_mz` | Theoretical peptide mass-to-charge ratio based on an identified sequence and modifications | float -| - +| Calculate.Precursor.Mz | Calculated M/Z | m/z | calc_mass_to_charge @@ -606,15 +606,15 @@ The following fields are shared among Peptide-based views: <>, <>, | DIA-NN Scores | FragPipe Scores | MaxQuant Scores -| - +| search_engine_score 7+^| Protein fields shared by <> <> <> | `pg_accessions` | Protein group accessions of all the proteins that the peptide maps to | array[string], null +| Protein.Ids | x -| x -| x +| Proteins | accession | `pg_positions` @@ -624,6 +624,7 @@ The following fields are shared among Peptide-based views: <>, <>, | x | x | Combination of start and end positions + | `unique` | Unique peptide indicator, if the peptide maps to a single protein, the value is 1, otherwise 0 | int32, null @@ -638,7 +639,7 @@ The following fields are shared among Peptide-based views: <>, <>, | Global.PG.Q.Value | x | x -| - +| best_search_engine_score | `gg_accessions` | Gene group accessions, as a string array @@ -651,9 +652,9 @@ The following fields are shared among Peptide-based views: <>, <>, | `gg_names` | Gene names, as a string array | array[string], null +| - | x -| x -| x +| - | - 7+^| Spectra fields shared by <> <> @@ -678,7 +679,7 @@ The following fields are shared among Peptide-based views: <>, <>, | float, null | RT | x -| x +| Retention time | retention_time | `predicted_rt` @@ -737,7 +738,7 @@ The fields that are unique to the psm view are: | string | <> | Spectrum -| x +| MS/MS scan number | spectra_ref | `ion_mobility` diff --git a/docs/feature.avsc b/docs/feature.avsc index 826afa3..397673b 100644 --- a/docs/feature.avsc +++ b/docs/feature.avsc @@ -19,8 +19,8 @@ {"name": "pg_positions", "type": ["null", {"type": "array","items": "string"}], "doc": "Protein start and end positions written as start_post:end_post"}, {"name": "unique", "type": ["null", "int"], "doc": "Unique peptide indicator, if the peptide maps to a single protein, the value is 1, otherwise 0"}, {"name": "protein_global_qvalue", "type": ["null", "float32"], "doc": "Global q-value of the protein group at the experiment level"}, - {"name": "gene_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, - {"name": "gene_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, + {"name": "gg_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, + {"name": "gg_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, {"name": "precursor_charge", "type": "int", "doc": "Precursor charge"}, {"name": "observed_mz", "type": "float32", "doc": "Experimental peptide mass-to-charge ratio of identified peptide (in Da)"}, diff --git a/docs/include/psm_parquet_example.csv b/docs/include/psm_parquet_example.csv new file mode 100644 index 0000000..349159a --- /dev/null +++ b/docs/include/psm_parquet_example.csv @@ -0,0 +1,107 @@ +sequence,peptidoform,modifications,modification_details,posterior_error_probability,global_qvalue,is_decoy,calculated_mz,additional_scores,pg_accessions,pg_positions,unique,protein_global_qvalue,gg_accessions,gg_names,precursor_charge,observed_mz,rt,predicted_rt,quantmsio_version,reference_file_name,scan_number,ion_mobility,num_peaks,mz_array,intensity_array,rank,cv_params +VTIAQGGVLPNIQAVLLPK,VTIAQGGVLPNIQAVLLPK,,,0.0103795,0.0,0,966.0881,"[{'name': 'OpenMS', 'value': 0.0001662399840409615}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q16777|H2A2C_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['101:119' '101:119' '101:119' '101:119' '101:119' '101:119' '101:119' + '101:119' '101:119' '101:119']",0,,,,2,966.09174,8493.755,,,Blank,16140,,,,,, +VTIAQGGVLPNIQAVLLPK,VTIAQGGVLPNIQAVLLPK,,,0.00010013,0.0,0,966.0881,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q16777|H2A2C_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['101:119' '101:119' '101:119' '101:119' '101:119' '101:119' '101:119' + '101:119' '101:119' '101:119']",0,,,,2,966.0917,8385.162,,,Singlecell2,18243,,,,,, +VTIAQGGVLPNIQAVLLPK,VTIAQGGVLPNIQAVLLPK,,,0.000532035,0.0,0,966.0881,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q16777|H2A2C_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['101:119' '101:119' '101:119' '101:119' '101:119' '101:119' '101:119' + '101:119' '101:119' '101:119']",0,,,,2,966.09235,8540.901,,,Singlecell4,17782,,,,,, +FDQLFDDESDPFEVLK,FDQLFDDESDPFEVLK,,,0.097539,0.009113187,0,972.4491,"[{'name': 'OpenMS', 'value': 0.006372143197611701}]",['sp|Q8NC51|PAIRB_HUMAN'],['17:32'],1,0.00048449612,,,2,972.4515,8715.665,,,100HeLacells,33387,,,,,, +DGNASGTTLLEALDCILPPTRPTDKPLR,DGNASGTTLLEALDC[Carbamidomethyl]ILPPTRPTDKPLR,['15-UNIMOD:4'],,7.88383e-08,0.0,0,1007.86066,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68104|EF1A1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN']",['220:247' '220:247'],0,0.00048449612,,,3,1007.863,8614.799,,,Singlecell3,33651,,,,,, +DGNASGTTLLEALDCILPPTRPTDKPLR,DGNASGTTLLEALDC[Carbamidomethyl]ILPPTRPTDKPLR,['15-UNIMOD:4'],,2.3768e-05,0.0,0,1007.86066,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68104|EF1A1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN']",['220:247' '220:247'],0,0.00048449612,,,3,1007.8649,8625.608,,,Singlecell4,17950,,,,,, +LFYADHPFIFLVR,LFYADHPFIFLVR,,,0.000254486,0.0,0,546.6328,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P50454|SERPH_HUMAN'],['381:393'],1,0.00048449612,,,3,546.63586,8561.016,,,20HeLacells,32013,,,,,, +GVMLAVDAVIAELKK,GVMLAVDAVIAELKK,,,0.100064,0.005988024,0,778.95764,"[{'name': 'OpenMS', 'value': 0.006512210394489668}]",['sp|P10809|CH60_HUMAN'],['143:157'],1,0.00048449612,,,2,778.95917,8982.638,,,Singlecell3,20089,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,2.52112e-06,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.66797,8370.111,,,Singlecell1,17545,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,0.00185762,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.6672,8357.342,,,Singlecell2,18184,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,2.41634e-05,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.6683,8871.854,,,Singlecell3,19822,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,1.33666e-06,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.6673,8513.674,,,Singlecell4,17728,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,2.46994e-09,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.66614,8570.702,,,20HeLacells,33478,,,,,, +GHYTEGAELVDSVLDVVR,GHYTEGAELVDSVLDVVR,,,2.46994e-09,0.0,0,653.66547,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13509|TBB3_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['104:121' '104:121' '104:121' '104:121' '104:121'],0,,,,3,653.66614,8741.125,,,100HeLacells,33478,,,,,, +EQSILELGSLLAK,EQSILELGSLLAK,,,3.67476e-05,0.0,0,700.9034,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|O00231|PSD11_HUMAN'],['47:59'],1,0.00048449612,,,2,700.90485,8717.011,,,100HeLacells,33391,,,,,, +VTIAQGGVLPNIQAVLLPK,VTIAQGGVLPNIQAVLLPK,,,0.000397302,0.0,0,644.3945,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q16777|H2A2C_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['101:119' '101:119' '101:119' '101:119' '101:119' '101:119' '101:119' + '101:119' '101:119' '101:119']",0,,,,3,644.3975,8572.374,,,Singlecell2,17797,,,,,, +VTIAQGGVLPNIQAVLLPK,VTIAQGGVLPNIQAVLLPK,,,0.000397302,0.0,0,644.3945,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q16777|H2A2C_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['101:119' '101:119' '101:119' '101:119' '101:119' '101:119' '101:119' + '101:119' '101:119' '101:119']",0,,,,3,644.3975,8548.431,,,Singlecell4,17797,,,,,, +RQVLLSAAEAAEVILR,RQVLLSAAEAAEVILR,,,0.12011,0.009595613,0,580.344,"[{'name': 'OpenMS', 'value': 0.008377403252112885}]",['sp|P78371|TCPB_HUMAN'],['501:516'],1,0.00048449612,,,3,580.34717,8456.428,,,20HeLacells,31594,,,,,, +GSIFVVFDSIESAK,GSIFVVFDSIESAK,,,3.1379e-07,0.0,0,749.893,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P05455|LA_HUMAN'],['152:165'],1,0.00048449612,,,2,749.89703,8555.591,,,20HeLacells,31991,,,,,, +DGNASGTTLLEALDCILPPTRPTDKPLR,DGNASGTTLLEALDC[Carbamidomethyl]ILPPTRPTDKPLR,['15-UNIMOD:4'],,2.14907e-06,0.0,0,756.1473,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68104|EF1A1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN']",['220:247' '220:247'],0,0.00048449612,,,4,756.1492,8615.021,,,Singlecell1,33650,,,,,, +DGNASGTTLLEALDCILPPTRPTDKPLR,DGNASGTTLLEALDC[Carbamidomethyl]ILPPTRPTDKPLR,['15-UNIMOD:4'],,1.49355e-05,0.0,0,756.1473,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68104|EF1A1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN']",['220:247' '220:247'],0,0.00048449612,,,4,756.15063,8462.265,,,Singlecell2,18402,,,,,, +DGNASGTTLLEALDCILPPTRPTDKPLR,DGNASGTTLLEALDC[Carbamidomethyl]ILPPTRPTDKPLR,['15-UNIMOD:4'],,0.00232983,0.0,0,756.1473,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68104|EF1A1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN']",['220:247' '220:247'],0,0.00048449612,,,4,756.1503,8623.751,,,Singlecell4,17946,,,,,, +LPIVNEDDELVAIIAR,LPIVNEDDELVAIIAR,,,3.35226e-11,0.0,0,890.4962,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P12268|IMDH2_HUMAN'],['209:224'],1,0.00048449612,,,2,890.4985,8717.378,,,100HeLacells,33392,,,,,, +IISSDRDLLAVVFYGTEK,IISSDRDLLAVVFYGTEK,,,0.000835284,0.0,0,676.0334,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P12956|XRCC6_HUMAN'],['75:92'],1,0.00048449612,,,3,676.0344,8719.948,,,100HeLacells,33402,,,,,, +LQQELDDLLVDLDHQR,LQQELDDLLVDLDHQR,,,0.00178007,0.0,0,650.66907,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P35579|MYH9_HUMAN'],['1418:1433'],1,0.00048449612,,,3,650.67126,8327.369,,,Singlecell4,17360,,,,,, +NFATSLYSMIK,NFATSLYSMIK,,,0.000166147,0.0,0,637.8261,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P08758|ANXA5_HUMAN,sp|CONTAMINANT_P08758|CONTAMINANT_ANXA5_HUMAN']",['291:301' '290:300'],0,,,,2,637.82745,8723.008,,,100HeLacells,33415,,,,,, +GQSEDPGSLLSLFR,GQSEDPGSLLSLFR,,,1.455e-05,0.0,0,753.38336,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P08195|4F2_HUMAN'],['511:524'],1,0.00048449612,,,2,753.3852,8760.964,,,100HeLacells,33549,,,,,, +SLDLDSIIAEVK,SLDLDSIIAEVK,,,0.0484114,0.0,0,651.8612,"[{'name': 'OpenMS', 'value': 0.002656434474616293}]","['sp|P04264|K2C1_HUMAN,sp|CONTAMINANT_Q5XQN5|CONTAMINANT_K2C5_BOVIN,sp|CONTAMINANT_P50446|CONTAMINANT_K2C6A_MOUSE,sp|CONTAMINANT_Q922U2|CONTAMINANT_K2C5_MOUSE,tr|CONTAMINANT_Q8BGZ7|CONTAMINANT_K2C75_MOUSE,sp|CONTAMINANT_P04264|CONTAMINANT_K2C1_HUMAN']",['344:355' '333:344' '316:327' '326:337' '314:325' '343:354'],0,,,,2,651.8637,8314.945,,,Blank,15832,,,,,, +SLDLDSIIAEVK,SLDLDSIIAEVK,,,0.00760036,0.0,0,651.8612,"[{'name': 'OpenMS', 'value': 0.0001371412898138307}]","['sp|P04264|K2C1_HUMAN,sp|CONTAMINANT_Q5XQN5|CONTAMINANT_K2C5_BOVIN,sp|CONTAMINANT_P50446|CONTAMINANT_K2C6A_MOUSE,sp|CONTAMINANT_Q922U2|CONTAMINANT_K2C5_MOUSE,tr|CONTAMINANT_Q8BGZ7|CONTAMINANT_K2C75_MOUSE,sp|CONTAMINANT_P04264|CONTAMINANT_K2C1_HUMAN']",['344:355' '333:344' '316:327' '326:337' '314:325' '343:354'],0,,,,2,651.865,8806.728,,,Singlecell3,19669,,,,,, +FLSQPFQVAEVFTGHMGK,FLSQPFQVAEVFTGHMGK,,,3.54518e-06,0.0,0,675.00836,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P06576|ATPB_HUMAN'],['463:480'],1,0.00048449612,,,3,675.01196,8421.21,,,20HeLacells,31451,,,,,, +LIALLEVLSQKK,LIALLEVLSQKK,,,1.77632e-05,0.0,0,452.2938,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P21333|FLNA_HUMAN'],['77:88'],1,0.00048449612,,,3,452.29593,8731.331,,,100HeLacells,33441,,,,,, +LIALLEVLSQKK,LIALLEVLSQKK,,,0.000309836,0.0,0,677.9371,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P21333|FLNA_HUMAN'],['77:88'],1,0.00048449612,,,2,677.94,8731.942,,,100HeLacells,33443,,,,,, +TLAESALQLLYTAK,TLAESALQLLYTAK,,,1.12978e-07,0.0,0,761.4298,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|Q9Y490|TLN1_HUMAN'],['1767:1780'],1,0.00048449612,,,2,761.4341,8509.262,,,20HeLacells,31806,,,,,, +TLAESALQLLYTAK,TLAESALQLLYTAK,,,4.90547e-08,0.0,0,761.4298,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|Q9Y490|TLN1_HUMAN'],['1767:1780'],1,0.00048449612,,,2,761.43164,8760.229,,,100HeLacells,33547,,,,,, +LLLSTLTLLSK,LLLSTLTLLSK,,,0.00318657,0.00010459157,0,601.39197,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]",['sp|Q96EK6|GNA1_HUMAN'],['136:146'],1,0.00048449612,,,2,601.39246,8742.839,,,100HeLacells,33486,,,,,, +ALTLGALTLPLAR,ALTLGALTLPLAR,,,0.0105694,0.0,0,655.41376,"[{'name': 'OpenMS', 'value': 0.0001983864568178812}]",['sp|Q9BSJ8|ESYT1_HUMAN'],['551:563'],1,0.00048449612,,,2,655.41705,8484.072,,,20HeLacells,31705,,,,,, +GFGFILFK,GFGFILFK,,,0.0404971,0.0010400416,0,464.7682,"[{'name': 'OpenMS', 'value': 0.00191654802321761}]",['sp|Q99729|ROAA_HUMAN'],['111:118'],1,0.00048449612,,,2,464.76956,8462.1045,,,Singlecell1,17730,,,,,, +GFGFILFK,GFGFILFK,,,0.00555459,0.0,0,464.7682,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]",['sp|Q99729|ROAA_HUMAN'],['111:118'],1,0.00048449612,,,2,464.77048,8608.417,,,Singlecell2,32011,,,,,, +GFGFILFK,GFGFILFK,,,0.051365,0.0006841505,0,464.7682,"[{'name': 'OpenMS', 'value': 0.002873410312350343}]",['sp|Q99729|ROAA_HUMAN'],['111:118'],1,0.00048449612,,,2,464.7696,8600.016,,,Singlecell4,17899,,,,,, +GFGFILFK,GFGFILFK,,,0.00555459,0.0,0,464.7682,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]",['sp|Q99729|ROAA_HUMAN'],['111:118'],1,0.00048449612,,,2,464.77048,8560.54,,,20HeLacells,32011,,,,,, +GFGFILFK,GFGFILFK,,,0.0240736,0.0012199372,0,464.7682,"[{'name': 'OpenMS', 'value': 0.0009923240813705747}]",['sp|Q99729|ROAA_HUMAN'],['111:118'],1,0.00048449612,,,2,464.76852,8813.414,,,100HeLacells,33704,,,,,, +LCYVALDFEQEMATAASSSSLEK,LC[Carbamidomethyl]YVALDFEQEMATAASSSSLEK,['2-UNIMOD:4'],,7.14891e-08,0.0,0,1275.5906,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P60709|ACTB_HUMAN,sp|P63261|ACTG_HUMAN,sp|Q6S8J3|POTEE_HUMAN']",['216:238' '216:238' '916:938'],0,,,,2,1275.5962,8466.968,,,Singlecell4,17635,,,,,, +IEVPLYSLLEQTHLK,IEVPLYSLLEQTHLK,,,7.76019e-05,0.0,0,595.0049,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|Q92598|HS105_HUMAN'],['317:331'],1,0.00048449612,,,3,595.0067,8762.801,,,100HeLacells,33555,,,,,, +ELLPVLISAMK,ELLPVLISAMK,,,6.58663e-05,0.0,0,607.36487,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P18206|VINC_HUMAN'],['200:210'],1,0.00048449612,,,2,607.36633,8761.943,,,100HeLacells,33552,,,,,, +AGKPVICATQMLESMIK,AGKPVIC[Carbamidomethyl]ATQMLESMIK,['7-UNIMOD:4'],,4.9688e-08,0.0,0,626.32794,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P14618|KPYM_HUMAN'],['320:336'],1,0.00048449612,,,3,626.3284,8667.111,,,100HeLacells,33231,,,,,, +LTTPTYGDLNHLVSATMSGVTTCLR,LTTPTYGDLNHLVSATMSGVTTC[Carbamidomethyl]LR,['23-UNIMOD:4'],,0.000313476,0.0,0,903.4509,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04350|TBB4A_HUMAN,sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['217:241' '217:241' '217:241' '217:241' '217:241'],0,,,,3,903.45465,8499.443,,,Singlecell4,17700,,,,,, +LTTPTYGDLNHLVSATMSGVTTCLR,LTTPTYGDLNHLVSATMSGVTTC[Carbamidomethyl]LR,['23-UNIMOD:4'],,2.90912e-08,0.0,0,903.4509,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04350|TBB4A_HUMAN,sp|P07437|TBB5_HUMAN,sp|P68371|TBB4B_HUMAN,sp|Q13885|TBB2A_HUMAN,sp|Q9BVA1|TBB2B_HUMAN']",['217:241' '217:241' '217:241' '217:241' '217:241'],0,,,,3,903.452,8719.581,,,100HeLacells,33400,,,,,, +DSTLIMQLLR,DSTLIMQLLR,,,0.0142214,0.0,0,595.3341,"[{'name': 'OpenMS', 'value': 0.0004017429463209617}]","['sp|P27348|1433T_HUMAN,sp|P31946|1433B_HUMAN,sp|P31947|1433S_HUMAN,sp|P61981|1433G_HUMAN,sp|P62258|1433E_HUMAN,sp|P63104|1433Z_HUMAN,sp|Q04917|1433F_HUMAN']",['213:222' '215:224' '215:224' '218:227' '216:225' '213:222' '218:227'],0,,,,2,595.3357,8382.329,,,Singlecell2,18237,,,,,, +DSTLIMQLLR,DSTLIMQLLR,,,0.00421726,0.0,0,595.3341,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]","['sp|P27348|1433T_HUMAN,sp|P31946|1433B_HUMAN,sp|P31947|1433S_HUMAN,sp|P61981|1433G_HUMAN,sp|P62258|1433E_HUMAN,sp|P63104|1433Z_HUMAN,sp|Q04917|1433F_HUMAN']",['213:222' '215:224' '215:224' '218:227' '216:225' '213:222' '218:227'],0,,,,2,595.3364,8880.992,,,Singlecell3,19844,,,,,, +VISGVLQLGNIVFK,VISGVLQLGNIVFK,,,0.000202351,0.0,0,743.95325,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P35579|MYH9_HUMAN'],['342:355'],1,0.00048449612,,,2,743.95416,8890.398,,,Singlecell3,19866,,,,,, +VISGVLQLGNIVFK,VISGVLQLGNIVFK,,,1.10201e-05,0.0,0,743.95325,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P35579|MYH9_HUMAN'],['342:355'],1,0.00048449612,,,2,743.95605,8593.928,,,Singlecell4,17887,,,,,, +VNPTVFFDIAVDGEPLGR,VNPTVFFDIAVDGEPLGR,,,0.0738295,0.001875293,0,973.5045,"[{'name': 'OpenMS', 'value': 0.004837132712396439}]","['sp|P62937|PPIA_HUMAN,sp|CONTAMINANT_P62937|CONTAMINANT_PPIA_HUMAN']",['2:19' '1:18'],0,,,,2,973.50903,8501.406,,,Singlecell1,17810,,,,,, +VNPTVFFDIAVDGEPLGR,VNPTVFFDIAVDGEPLGR,,,0.000331695,0.0,0,973.5045,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P62937|PPIA_HUMAN,sp|CONTAMINANT_P62937|CONTAMINANT_PPIA_HUMAN']",['2:19' '1:18'],0,,,,2,973.5087,8635.207,,,Singlecell4,17969,,,,,, +KEGGLGPLNIPLLADVTR,KEGGLGPLNIPLLADVTR,,,1.27732e-07,0.0,0,621.6948,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P32119|PRDX2_HUMAN'],['92:109'],1,0.00048449612,,,3,621.69635,8761.576,,,100HeLacells,33551,,,,,, +ILGLLDAYLK,ILGLLDAYLK,,,0.00696218,0.0,0,559.8446,"[{'name': 'OpenMS', 'value': 0.0001371412898138307}]",['sp|P26641|EF1G_HUMAN'],['138:147'],1,0.00048449612,,,2,559.84796,8875.563,,,Singlecell3,19831,,,,,, +ILGLLDAYLK,ILGLLDAYLK,,,0.000336243,0.0,0,559.8446,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P26641|EF1G_HUMAN'],['138:147'],1,0.00048449612,,,2,559.8461,8506.868,,,Singlecell4,17714,,,,,, +ILGLLDAYLK,ILGLLDAYLK,,,6.13506e-05,0.0,0,559.8446,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P26641|EF1G_HUMAN'],['138:147'],1,0.00048449612,,,2,559.8453,8748.961,,,100HeLacells,33510,,,,,, +LCYVALDFEQEMATAASSSSLEK,LC[Carbamidomethyl]YVALDFEQEMATAASSSSLEK,['2-UNIMOD:4'],,3.88507e-06,0.0,0,850.7295,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P60709|ACTB_HUMAN,sp|P63261|ACTG_HUMAN,sp|Q6S8J3|POTEE_HUMAN']",['216:238' '216:238' '916:938'],0,,,,3,850.7327,8464.905,,,Singlecell4,17631,,,,,, +DICNDVLSLLEK,DIC[Carbamidomethyl]NDVLSLLEK,['3-UNIMOD:4'],,0.00590582,0.0,0,709.8634,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]",['sp|P63104|1433Z_HUMAN'],['92:103'],1,0.00048449612,,,2,709.86566,8493.84,,,Singlecell2,18466,,,,,, +LASDLLEWIR,LASDLLEWIR,,,8.31327e-05,0.0,0,608.3403,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|O43707|ACTN4_HUMAN,sp|P12814|ACTN1_HUMAN']",['301:310' '282:291'],0,,,,2,608.3437,8491.295,,,20HeLacells,31734,,,,,, +LASDLLEWIR,LASDLLEWIR,,,1.16274e-05,0.0,0,608.3403,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|O43707|ACTN4_HUMAN,sp|P12814|ACTN1_HUMAN']",['301:310' '282:291'],0,,,,2,608.3419,8759.861,,,100HeLacells,33546,,,,,, +GLGTDEESILTLLTSR,GLGTDEESILTLLTSR,,,4.20496e-06,0.0,0,852.95435,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P08758|ANXA5_HUMAN,sp|CONTAMINANT_P08758|CONTAMINANT_ANXA5_HUMAN']",['30:45' '29:44'],0,,,,2,852.95905,8657.442,,,20HeLacells,32395,,,,,, +TVPFCSTFAAFFTR,TVPFC[Carbamidomethyl]STFAAFFTR,['5-UNIMOD:4'],,7.47187e-06,0.0,0,826.4005,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P29401|TKT_HUMAN'],['382:395'],1,0.00048449612,,,2,826.40283,8732.819,,,Singlecell3,18203,,,,,, +TVPFCSTFAAFFTR,TVPFC[Carbamidomethyl]STFAAFFTR,['5-UNIMOD:4'],,7.47187e-06,0.0,0,826.4005,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P29401|TKT_HUMAN'],['382:395'],1,0.00048449612,,,2,826.40283,8754.9795,,,Singlecell4,18203,,,,,, +TVPFCSTFAAFFTR,TVPFC[Carbamidomethyl]STFAAFFTR,['5-UNIMOD:4'],,1.48906e-05,0.0,0,826.4005,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P29401|TKT_HUMAN'],['382:395'],1,0.00048449612,,,2,826.4051,8688.256,,,20HeLacells,32517,,,,,, +LTTDFNVIVEALSK,LTTDFNVIVEALSK,,,0.00233697,0.0,0,775.42725,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P05455|LA_HUMAN'],['61:74'],1,0.00048449612,,,2,775.4303,8848.367,,,100HeLacells,33805,,,,,, +AVSSAIAQLLGEVAQGNENYAGIAAR,AVSSAIAQLLGEVAQGNENYAGIAAR,,,1.14419e-06,0.0,0,858.4488,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|Q9Y490|TLN1_HUMAN'],['1097:1122'],1,0.00048449612,,,3,858.45245,8847.14,,,100HeLacells,33801,,,,,, +SALSGHLETVILGLLK,SALSGHLETVILGLLK,,,1.0203e-08,0.0,0,825.9931,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|A6NMY6|AXA2L_HUMAN,sp|P07355|ANXA2_HUMAN']",['89:104' '89:104'],0,,,,2,825.9957,8807.471,,,20HeLacells,32993,,,,,, +VGWEQLLTTIAR,VGWEQLLTTIAR,,,0.00810558,0.00029205607,0,693.8906,"[{'name': 'OpenMS', 'value': 0.0001371412898138307}]","['sp|O43707|ACTN4_HUMAN,sp|P12814|ACTN1_HUMAN']",['734:745' '715:726'],0,,,,2,693.89215,8692.6045,,,Singlecell4,18081,,,,,, +VGWEQLLTTIAR,VGWEQLLTTIAR,,,2.24723e-06,0.0,0,693.8906,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|O43707|ACTN4_HUMAN,sp|P12814|ACTN1_HUMAN']",['734:745' '715:726'],0,,,,2,693.8939,8845.791,,,100HeLacells,33797,,,,,, +VIHDNFGIVEGLMTTVHAITATQK,VIHDNFGIVEGLMTTVHAITATQK,,,0.116785,0.0027560864,0,865.7915,"[{'name': 'OpenMS', 'value': 0.008123235425231561}]",['sp|P04406|G3P_HUMAN'],['163:186'],1,0.00048449612,,,3,865.79034,8699.153,,,Singlecell1,18210,,,,,, +VIHDNFGIVEGLMTTVHAITATQK,VIHDNFGIVEGLMTTVHAITATQK,,,2.40883e-14,0.0,0,865.7915,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P04406|G3P_HUMAN'],['163:186'],1,0.00048449612,,,3,865.79535,8808.569,,,Singlecell3,32912,,,,,, +VIHDNFGIVEGLMTTVHAITATQK,VIHDNFGIVEGLMTTVHAITATQK,,,4.85351e-05,0.0,0,865.7915,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P04406|G3P_HUMAN'],['163:186'],1,0.00048449612,,,3,865.793,8848.273,,,Singlecell4,18379,,,,,, +VIHDNFGIVEGLMTTVHAITATQK,VIHDNFGIVEGLMTTVHAITATQK,,,2.40883e-14,0.0,0,865.7915,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P04406|G3P_HUMAN'],['163:186'],1,0.00048449612,,,3,865.79535,8787.107,,,20HeLacells,32912,,,,,, +SALSGHLETVILGLLK,SALSGHLETVILGLLK,,,1.25548e-05,0.0,0,550.9978,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|A6NMY6|AXA2L_HUMAN,sp|P07355|ANXA2_HUMAN']",['89:104' '89:104'],0,,,,3,550.9997,8711.759,,,Singlecell1,18236,,,,,, +SALSGHLETVILGLLK,SALSGHLETVILGLLK,,,1.15426e-05,0.0,0,550.9978,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|A6NMY6|AXA2L_HUMAN,sp|P07355|ANXA2_HUMAN']",['89:104' '89:104'],0,,,,3,550.9996,8825.009,,,Singlecell2,32991,,,,,, +SALSGHLETVILGLLK,SALSGHLETVILGLLK,,,0.000969606,0.0,0,550.9978,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|A6NMY6|AXA2L_HUMAN,sp|P07355|ANXA2_HUMAN']",['89:104' '89:104'],0,,,,3,550.9983,8866.462,,,Singlecell4,18411,,,,,, +SALSGHLETVILGLLK,SALSGHLETVILGLLK,,,1.15426e-05,0.0,0,550.9978,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|A6NMY6|AXA2L_HUMAN,sp|P07355|ANXA2_HUMAN']",['89:104' '89:104'],0,,,,3,550.9996,8806.896,,,20HeLacells,32991,,,,,, +LIALLEVLSQK,LIALLEVLSQK,,,0.00267132,0.0,0,613.8896,"[{'name': 'OpenMS', 'value': 7.215267506042787e-05}]","['sp|O75369|FLNB_HUMAN,sp|P21333|FLNA_HUMAN,sp|Q14315|FLNC_HUMAN']",['50:60' '77:87' '70:80'],0,,,,2,613.8912,8964.3125,,,Singlecell3,20045,,,,,, +LIALLEVLSQK,LIALLEVLSQK,,,0.00120387,0.0,0,613.8896,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|O75369|FLNB_HUMAN,sp|P21333|FLNA_HUMAN,sp|Q14315|FLNC_HUMAN']",['50:60' '77:87' '70:80'],0,,,,2,613.8907,8758.802,,,Singlecell4,18208,,,,,, +LIALLEVLSQK,LIALLEVLSQK,,,7.77869e-05,0.0,0,613.8896,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|O75369|FLNB_HUMAN,sp|P21333|FLNA_HUMAN,sp|Q14315|FLNC_HUMAN']",['50:60' '77:87' '70:80'],0,,,,2,613.8924,8693.505,,,20HeLacells,32538,,,,,, +SGETEDTFIADLVVGLCTGQIK,SGETEDTFIADLVVGLC[Carbamidomethyl]TGQIK,['17-UNIMOD:4'],,2.66264e-09,0.0,0,1177.0833,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P06733|ENOA_HUMAN,sp|P09104|ENOG_HUMAN,sp|P13929|ENOB_HUMAN']",['373:394' '373:394' '373:394'],0,,,,2,1177.0891,8787.681,,,20HeLacells,32914,,,,,, +TSFFQALGITTK,TSFFQALGITTK,,,2.40908e-06,0.0,0,657.35864,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P05388|RLA0_HUMAN,sp|Q8NHW5|RLA0L_HUMAN']",['135:146' '135:146'],0,,,,2,657.35846,8669.557,,,100HeLacells,33239,,,,,, +VIHDNFGIVEGLMTTVHAITATQK,VIHDNFGIVEGLMTTVHAITATQK,,,2.00447e-06,0.0,0,649.59546,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P04406|G3P_HUMAN'],['163:186'],1,0.00048449612,,,4,649.59845,8787.395,,,20HeLacells,32913,,,,,, +NMAEQIIQEIYSQIQSK,NMAEQIIQEIYSQIQSK,,,6.51906e-05,0.0,0,675.0103,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P29401|TKT_HUMAN'],['265:281'],1,0.00048449612,,,3,675.01154,8895.207,,,20HeLacells,33319,,,,,, +LISQIVSSITASLR,LISQIVSSITASLR,,,2.75381e-06,0.0,0,744.44324,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P68363|TBA1B_HUMAN,sp|P68366|TBA4A_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|Q9NY65|TBA8_HUMAN,sp|Q9H853|TBA4B_HUMAN']",['230:243' '230:243' '230:243' '230:243' '169:182'],0,,,,2,744.44586,8748.024,,,20HeLacells,32755,,,,,, +ALLFVPR,ALLFVPR,,,0.0298831,0.00035880876,0,408.26053,"[{'name': 'OpenMS', 'value': 0.001335265206397909}]",['sp|P07900|HS90A_HUMAN'],['339:345'],1,0.00048449612,,,2,408.2627,8342.743,,,Singlecell3,18601,,,,,, +MELITILEK,[Acetyl]-MELITILEK,['0-UNIMOD:1'],,0.0288495,0.0014612343,0,566.3201,"[{'name': 'OpenMS', 'value': 0.001257142857142857}]",['sp|Q14974|IMB1_HUMAN'],['1:9'],1,0.00048449612,,,2,566.3198,8957.259,,,100HeLacells,34122,,,,,, +IIVLGLLPR,IIVLGLLPR,,,0.000312087,0.0,0,497.3446,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P68402|PA1B2_HUMAN'],['134:142'],1,0.00048449612,,,2,497.34558,8655.624,,,100HeLacells,33192,,,,,, +DLVVLLFETALLSSGFSLEDPQTHSNR,DLVVLLFETALLSSGFSLEDPQTHSNR,,,0.0490109,0.0028342574,0,996.84863,"[{'name': 'OpenMS', 'value': 0.002704439565147539}]",['sp|P08238|HS90B_HUMAN'],['653:679'],1,0.00048449612,,,3,996.8527,8998.67,,,20HeLacells,33698,,,,,, +MFLVNSFLK,[Acetyl]-MFLVNSFLK,['0-UNIMOD:1'],,0.026455,0.0012114686,0,570.80975,"[{'name': 'OpenMS', 'value': 0.001040191857609292}]",['sp|P04632|CPNS1_HUMAN'],['1:9'],1,0.00048449612,,,2,570.8129,8984.484,,,20HeLacells,33641,,,,,, +LVLNPSAININFNLILLLLLELLMAATVIIAAR,LVLNPSAININFNLILLLLLELLMAATVIIAAR,,,3.73661e-06,0.0,0,1787.0826,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|Q15049|MLC1_HUMAN'],['133:165'],1,0.00048449612,,,2,1787.0801,9075.135,,,Singlecell2,19594,,,,,, +TVLDLAVVLFETATLR,TVLDLAVVLFETATLR,,,0.0120299,0.00015069319,0,587.67676,"[{'name': 'OpenMS', 'value': 0.0002867246487623052}]",['sp|P14625|ENPL_HUMAN'],['709:724'],1,0.00048449612,,,3,587.67847,9008.352,,,20HeLacells,33737,,,,,, +TVLDLAVVLFETATLR,TVLDLAVVLFETATLR,,,6.51866e-10,0.0,0,881.0115,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P14625|ENPL_HUMAN'],['709:724'],1,0.00048449612,,,2,881.01154,8987.359,,,100HeLacells,34210,,,,,, +TALLDAAGVASLLTTAEVVVTEIPK,TALLDAAGVASLLTTAEVVVTEIPK,,,4.68241e-10,0.0,0,828.1387,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P10809|CH60_HUMAN'],['527:551'],1,0.00048449612,,,3,828.1409,9001.367,,,100HeLacells,34247,,,,,, +TALLDAAGVASLLTTAEVVVTEIPK,TALLDAAGVASLLTTAEVVVTEIPK,,,5.38407e-06,0.0,0,1241.7043,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P10809|CH60_HUMAN'],['527:551'],1,0.00048449612,,,2,1241.7075,9000.999,,,100HeLacells,34246,,,,,, +VGAGAPVYLAAVLEYLTAEILELAGNAAR,VGAGAPVYLAAVLEYLTAEILELAGNAAR,,,2.47781e-08,0.0,0,972.53406,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]","['sp|P04908|H2A1B_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|P16104|H2AX_HUMAN,sp|P20671|H2A1D_HUMAN,sp|Q7L7L0|H2A3_HUMAN,sp|Q8IUE6|H2A2B_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q9BTM1|H2AJ_HUMAN']","['44:72' '44:72' '44:72' '44:72' '44:72' '44:72' '44:72' '44:72' '44:72' + '44:72']",0,,,,3,972.53815,9107.571,,,100HeLacells,34576,,,,,, +FEVGDIMLIR,FEVGDIMLIR,,,0.000299463,0.0,0,596.82336,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P00491|PNPH_HUMAN'],['124:133'],1,0.00048449612,,,2,596.8236,8666.256,,,100HeLacells,33228,,,,,, +ACPLDQAIGLLVAIFHK,[Acetyl]-AC[Carbamidomethyl]PLDQAIGLLVAIFHK,['0-UNIMOD:1' '2-UNIMOD:4'],,0.000590219,0.0,0,954.52423,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P06703|S10A6_HUMAN'],['2:18'],1,0.00048449612,,,2,954.52924,9321.862,,,20HeLacells,34975,,,,,, +ACPLDQAIGLLVAIFHK,[Acetyl]-AC[Carbamidomethyl]PLDQAIGLLVAIFHK,['0-UNIMOD:1' '2-UNIMOD:4'],,0.00172152,0.0,0,954.52423,"[{'name': 'OpenMS', 'value': 4.368338284116722e-05}]",['sp|P06703|S10A6_HUMAN'],['2:18'],1,0.00048449612,,,2,955.0301,9128.595,,,100HeLacells,34638,,,,,, diff --git a/docs/peptide.avsc b/docs/peptide.avsc index 983156b..7e0a0c6 100644 --- a/docs/peptide.avsc +++ b/docs/peptide.avsc @@ -19,8 +19,8 @@ {"name": "pg_positions", "type": ["null",{"type": "array","items": "string"}], "doc": "Protein start and end positions written as start_post:end_post"}, {"name": "unique", "type": ["null", "int"], "doc": "Unique peptide indicator, if the peptide maps to a single protein, the value is 1, otherwise 0"}, {"name": "protein_global_qvalue", "type": ["null", "float32"], "doc": "Global q-value of the protein group at the experiment level"}, - {"name": "gene_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, - {"name": "gene_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, + {"name": "gg_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, + {"name": "gg_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, {"name": "precursor_charge", "type": "int", "doc": "Precursor charge"}, {"name": "observed_mz", "type": "float32", "doc": "Experimental peptide mass-to-charge ratio of identified peptide (in Da)"}, diff --git a/docs/protein.avsc b/docs/protein.avsc index aedbace..6f4220c 100644 --- a/docs/protein.avsc +++ b/docs/protein.avsc @@ -9,8 +9,8 @@ {"name": "global_qvalue","type": ["null", "float32"], "doc": "The global qvalue for a given protein or protein groups"}, {"name": "is_decoy","type": ["null", "int"], "doc": "If the protein is decoy"}, {"name": "best_id_score", "type": "string", "doc": "The best search engine score for the identification"}, - {"name": "gene_accessions", "type": ["null", {"type": "array","items": "string"}], "doc": "The gene accessions corresponding to every protein"}, - {"name": "gene_names", "type": ["null", {"type": "array","items": "string"}], "doc": "The gene names corresponding to every protein"}, + {"name": "gg_accessions", "type": ["null", {"type": "array","items": "string"}], "doc": "The gene accessions corresponding to every protein"}, + {"name": "gg_names", "type": ["null", {"type": "array","items": "string"}], "doc": "The gene names corresponding to every protein"}, {"name": "number_peptides","type": ["null", "int"], "doc": "The total number of peptides for a give protein"}, {"name": "number_psms","type": ["null", "int"], "doc": "The total number of peptide spectrum matches"}, {"name": "number_unique_peptides","type": ["null", "int"], "doc": "The total number of unique peptides"}, diff --git a/docs/psm.avsc b/docs/psm.avsc index 25279ad..508ba6a 100644 --- a/docs/psm.avsc +++ b/docs/psm.avsc @@ -28,8 +28,8 @@ {"name": "pg_positions", "type": ["null",{"type": "array","items": "string"}], "doc": "Protein start and end positions written as start_post:end_post"}, {"name": "unique", "type": ["null", "int"], "doc": "Unique peptide indicator, if the peptide maps to a single protein, the value is 1, otherwise 0"}, {"name": "protein_global_qvalue", "type": ["null", "float32"], "doc": "Global q-value of the protein group at the experiment level"}, - {"name": "gene_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, - {"name": "gene_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, + {"name": "gg_accessions", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene accessions, as string array"}, + {"name": "gg_names", "type": ["null", {"type": "array", "items": "string"}], "doc": "Gene names, as string array"}, {"name": "precursor_charge", "type": "int", "doc": "Precursor charge"}, {"name": "observed_mz", "type": "float32", "doc": "Experimental peptide mass-to-charge ratio of identified peptide (in Da)"}, diff --git a/quantmsio/temp_core/common.py b/quantmsio/temp_core/common.py new file mode 100644 index 0000000..b8ed4f7 --- /dev/null +++ b/quantmsio/temp_core/common.py @@ -0,0 +1,39 @@ +PSM_MAP = { + "sequence": "sequence", + "modifications": "modifications", + + "opt_global_Posterior_Error_Probability_score": "posterior_error_probability", + "opt_global_q-value": "global_qvalue", + "opt_global_cv_MS:1002217_decoy_peptide": "is_decoy", + "calc_mass_to_charge": "calculated_mz", + "accession": "pg_accessions", + "unique": "unique", + "charge": "precursor_charge", + "exp_mass_to_charge": "observed_mz", + "retention_time": "rt", +} + +PSM_USECOLS = list(PSM_MAP.keys()) + [ + "spectra_ref", + "start", + "end", +] + +ADDITIONS = [ + "peptidoform", + "modification_details", + "additional_scores", + "pg_positions", + "protein_global_qvalue", + "gg_accessions", + "gg_names", + "predicted_rt" + "reference_file_name" + "scan_number" + "ion_mobility" + "num_peaks" + "mz_array" + "intensity_array" + "rank" + "cv_params" + ] \ No newline at end of file diff --git a/quantmsio/temp_core/format.py b/quantmsio/temp_core/format.py new file mode 100644 index 0000000..623f76c --- /dev/null +++ b/quantmsio/temp_core/format.py @@ -0,0 +1,165 @@ +import pyarrow as pa +PEPTIDE_FIELDS = [ + pa.field( + "sequence", + pa.string(), + metadata={"description": "The peptide’s sequence corresponding to the PSM"}, + ), + pa.field( + "peptidoform", + pa.string(), + metadata={"description": "Peptide sequence with modifications: Read the specification for more details"}, + ), + pa.field( + "modifications", + pa.list_(pa.string()), + metadata={"description": "List of modifications as string array, easy for search and filter"}, + ), + pa.field( + "modification_details", + pa.list_(pa.string()), + metadata={"description": "List of alternative site probabilities for the modification format: read the specification for more details"}, + ), + pa.field( + "posterior_error_probability", + pa.float32(), + metadata={"description": "Posterior error probability for the given peptide spectrum match"}, + ), + pa.field("global_qvalue", pa.float32(), metadata={"description": "Global q-value of the peptide or psm at the level of the experiment"}), + + pa.field( + "is_decoy", + pa.int32(), + metadata={"description": "Decoy indicator, 1 if the PSM is a decoy, 0 target"}, + ), + pa.field( + "calculated_mz", + pa.float32(), + metadata={"description": "Theoretical peptide mass-to-charge ratio based on identified sequence and modifications"}, + ), + pa.field( + "additional_scores", + pa.list_( + pa.struct([ + ("name", pa.string()), + ("value", pa.float32()) + ]) + ), + metadata={"description": "List of structures, each structure contains two fields: name and value"}, + ), + + pa.field( + "pg_accessions", + pa.list_(pa.string()), + metadata={"description": "Protein group accessions of all the proteins that the peptide maps to"}, + ), + pa.field( + "pg_positions", + pa.list_(pa.string()), + metadata={"description": "Protein start and end positions written as start_post:end_post"}, + ), + pa.field( + "unique", + pa.int32(), + metadata={"description": "Unique peptide indicator, if the peptide maps to a single protein, the value is 1, otherwise 0"}, + ), + pa.field( + "protein_global_qvalue", + pa.float32(), + metadata={"description": "Global q-value of the protein group at the experiment level"}, + ), + pa.field( + "gg_accessions", + pa.list_(pa.string()), + metadata={"description": "Gene accessions, as string array"}, + ), + pa.field( + "gg_names", + pa.list_(pa.string()), + metadata={"description": "Gene names, as string array"}, + ), + + pa.field( + "precursor_charge", + pa.int32(), + metadata={"description": "Precursor charge"}, + ), + pa.field( + "observed_mz", + pa.float32(), + metadata={"description": "Experimental peptide mass-to-charge ratio of identified peptide (in Da)"}, + ), + pa.field( + "rt", + pa.float32(), + metadata={"description": "MS2 scan’s precursor retention time (in seconds)"}, + ), + pa.field( + "predicted_rt", + pa.float32(), + metadata={"description": "Predicted retention time of the peptide (in seconds)"}, + ), + pa.field( + "quantmsio_version", + pa.string(), + metadata={"description": "The version of quantms.io"}, + ) +] + +PSM_UNIQUE_FIELDS = [ + pa.field( + "reference_file_name", + pa.string(), + metadata={"description": "Spectrum file name with no path information and not including the file extension"}, + ), + pa.field( + "scan_number", + pa.string(), + metadata={"description": "Scan number of the spectrum"}, + ), + pa.field( + "ion_mobility", + pa.float32(), + metadata={"description": "Ion mobility value for the precursor ion"}, + ), + pa.field("num_peaks", pa.int32(), metadata={"description": "Number of peaks in the spectrum used for the peptide spectrum match"}), + pa.field( + "mz_array", + pa.list_(pa.float32()), + metadata={"description": "Array of m/z values for the spectrum used for the peptide spectrum match"}, + ), + pa.field( + "intensity_array", + pa.list_(pa.float32()), + metadata={"description": "Array of intensity values for the spectrum used for the peptide spectrum match"}, + ), + pa.field("rank", pa.int32(), metadata={"description": "Rank of the peptide spectrum match in the search engine output"}), + + pa.field( + "cv_params", + pa.list_( + pa.struct([ + ("name", pa.string()), + ("value", pa.string()) + ]) + ), + metadata={"description": "Optional list of CV parameters for additional metadata"}, + ), +] + + + + + + + + + + + + + + + + +PSM_FIELDS = PEPTIDE_FIELDS + PSM_UNIQUE_FIELDS \ No newline at end of file diff --git a/quantmsio/temp_core/mzTab.py b/quantmsio/temp_core/mzTab.py new file mode 100644 index 0000000..c2d8551 --- /dev/null +++ b/quantmsio/temp_core/mzTab.py @@ -0,0 +1,224 @@ +import codecs +import os +import re +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq +from quantmsio.utils.pride_utils import get_quantmsio_modifications + +def fetch_modifications_from_mztab_line(line: str, _modifications: dict) -> dict: + """ + get the modifications from a mztab line. An mzTab modification could be a fixed or variable modification. + The structure of a fixed is the following: + MTD fixed_mod[1] [UNIMOD, UNIMOD:4, Carbamidomethyl, ] + MTD fixed_mod[1]-site C + MTD fixed_mod[1]-position Anywhere + while the structure of a variable modification is the following: + MTD var_mod[1] [UNIMOD, UNIMOD:21, Phospho, ] + MTD var_mod[1]-site S + MTD var_mod[1]-position Anywhere + + :param line: mztab line + :param _modifications: modifications dictionary + :return: modification dictionary + """ + line = line.strip() + line_parts = line.split("\t") + if line_parts[0] == "MTD" and "_mod[" in line_parts[1]: + if "site" not in line_parts[1] and "position" not in line_parts[1]: + values = line_parts[2].replace("[", "").replace("]", "").split(",") + accession = values[1].strip() + name = values[2].strip() + index = line_parts[1].split("[")[1].split("]")[0] + _modifications[accession] = [name, index, None, None] + elif "site" in line_parts[1]: + index = line_parts[1].split("[")[1].split("]")[0] + accession = None + for ( + key, + value, + ) in _modifications.items(): # for name, age in dictionary.iteritems(): (for Python 2.x) + if value[1] == index: + accession = key + if accession is None: + raise Exception("The accession for the modification is None") + _modifications[accession][2] = line_parts[2] + elif "position" in line_parts[1]: + index = line_parts[1].split("[")[1].split("]")[0] + accession = None + for key, value in _modifications.items(): + if value[1] == index: + accession = key + if accession is None: + raise Exception("The accession for the modification is None") + _modifications[accession][3] = line_parts[2] + return _modifications + +class MzTab: + def __init__(self,mzTab_path: str) -> None: + self.mztab_path = mzTab_path + # psm pos + self._psm_pos = None + # psm len + self._psm_len = None + # pep pos + self._pep_pos = None + # pep len + self._pep_len = None + # prt pos + self._prt_pos = None + # prt len + self._prt_len = None + # load psms columns + self._psms_columns = None + # load pep columns + self._pep_columns = None + + def __get_pos(self, header): + if header == "PSH" and self._pep_pos is not None: + return self._pep_pos + self._pep_len - 1 + elif header == "PEH" and self._prt_pos is not None: + return self._prt_pos + self._prt_len - 1 + else: + return 0 + + def __extract_len(self, header): + map_tag = {"PSH": "PSM", "PEH": "PEP", "PRH": "PRT"} + if os.stat(self.mztab_path).st_size == 0: + raise ValueError("File is empty") + f = open(self.mztab_path) + pos = self.__get_pos(header) + f.seek(pos) + line = f.readline() + while not line.startswith(header): + pos = f.tell() + line = f.readline() + + if header == "PSH": + self._psms_columns = line.split("\n")[0].split("\t") + if header == 'PEH': + self._pep_columns = line.split('\n')[0].split('\t') + + line = f.readline() + fle_len = 0 + while line.startswith(map_tag[header]): + fle_len += 1 + line = f.readline() + f.close() + return fle_len, pos + + def __load_second(self, header, **kwargs): + f = open(self.mztab_path) + if header == "PSH": + f.seek(self._psm_pos) + return pd.read_csv(f, nrows=self._psm_len, **kwargs) + elif header == "PEH": + f.seek(self._pep_pos) + return pd.read_csv(f, nrows=self._pep_len, **kwargs) + else: + f.seek(self._prt_pos) + return pd.read_csv(f, nrows=self._prt_len, **kwargs) + + def __set_table_config(self, header, length, pos): + if header == "PSH": + self._psm_pos = pos + self._psm_len = length + elif header == "PEH": + self._pep_pos = pos + self._pep_len = length + else: + self._prt_pos = pos + self._prt_len = length + + def skip_and_load_csv(self, header, **kwargs): + if self._psm_pos is not None and header == "PSH": + return self.__load_second(header, **kwargs) + if self._pep_pos is not None and header == "PEH": + return self.__load_second(header, **kwargs) + if self._prt_pos is not None and header == "PRH": + return self.__load_second(header, **kwargs) + fle_len, pos = self.__extract_len(header) + if os.stat(self.mztab_path).st_size == 0: + raise ValueError("File is empty") + f = open(self.mztab_path) + f.seek(pos) + self.__set_table_config(header, fle_len, pos) + return pd.read_csv(f, nrows=fle_len, sep='\t', **kwargs) + + def extract_ms_runs(self): + if os.stat(self.mztab_path).st_size == 0: + raise ValueError("File is empty") + f = codecs.open(self.mztab_path, "r", "utf-8") + line = f.readline() + ms_runs = {} + while line.split("\t")[0] == "MTD": + if line.split("\t")[1].split("-")[-1] == "location": + ms_runs[line.split("\t")[1].split("-")[0]] = line.split("\t")[2].split("//")[-1].split(".")[0] + line = f.readline() + f.close() + return ms_runs + + def get_protein_map(self, protein_str=None): + """ + return: a dict about protein score + """ + prt = self.skip_and_load_csv( + "PRH", + usecols=["ambiguity_members", "best_search_engine_score[1]"], + ) + if protein_str: + prt = prt[prt["ambiguity_members"].str.contains(f"{protein_str}", na=False)] + prt_score = prt.groupby("ambiguity_members").min() + protein_map = prt_score.to_dict()["best_search_engine_score[1]"] + return protein_map + + def get_score_names(self): + if os.stat(self.mztab_path).st_size == 0: + raise ValueError("File is empty") + f = codecs.open(self.mztab_path, "r", "utf-8") + line = f.readline() + score_names = {} + while line.split("\t")[0] == "MTD": + if "psm_search_engine_score" in line: + msgs = line.split('\t') + score_values = msgs[2].replace("[", "").replace("]", "").split(",") + score_name = score_values[2].strip() + if ":" in score_name: + score_name = score_name.split(':')[0] + score_names[score_name] = msgs[1].replace('psm_','') + line = f.readline() + f.close() + return score_names + + def generate_positions(self,start,end) -> list: + start = start.split(',') + end = end.split(',') + return [start + ':' + end for start,end in zip(start,end)] + + def get_modifications(self): + if os.stat(self.mztab_path).st_size == 0: + raise ValueError("File is empty") + f = codecs.open(self.mztab_path, "r", "utf-8") + line = f.readline() + mod_dict = {} + while line.split("\t")[0] == "MTD": + if "_mod[" in line: + mod_dict = fetch_modifications_from_mztab_line(line, mod_dict) + line = f.readline() + f.close() + return mod_dict + + def _generate_modification_list(self, modification_str: str): + + if pd.isna(modification_str): + return None + modifications = get_quantmsio_modifications(modification_str, self._modifications) + modifications_string = "" + for key, value in modifications.items(): + modifications_string += "|".join(map(str, value["position"])) + modifications_string = modifications_string + "-" + value["unimod_accession"] + "," + modifications_string = modifications_string[:-1] # Remove last comma + modification_list = modifications_string.split(",") + + return modification_list \ No newline at end of file diff --git a/quantmsio/temp_core/psm.py b/quantmsio/temp_core/psm.py new file mode 100644 index 0000000..9705cf5 --- /dev/null +++ b/quantmsio/temp_core/psm.py @@ -0,0 +1,105 @@ +import pyarrow as pa +import pyarrow.parquet as pq +from quantmsio.utils.file_utils import extract_protein_list +from quantmsio.utils.pride_utils import generate_scan_number +from quantmsio.utils.pride_utils import get_peptidoform_proforma_version_in_mztab +from quantmsio.temp_core.common import PSM_USECOLS,PSM_MAP +from quantmsio.temp_core.mzTab import MzTab +from quantmsio.temp_core.format import PSM_FIELDS +import pandas as pd + +PSM_SCHEMA = pa.schema( + PSM_FIELDS, + metadata={"description": "psm file in quantms.io format"}, +) + +class PsmInMemory(MzTab): + def __init__(self,mzTab_path): + super(PsmInMemory,self).__init__(mzTab_path) + self._ms_runs = self.extract_ms_runs() + self._protein_global_qvalue_map = self.get_protein_map() + self._modifications = self.get_modifications() + self._score_names = self.get_score_names() + + def generate_report(self,chunksize=1000000,protein_str=None): + for df in self.skip_and_load_csv('PSH',chunksize=chunksize): + if protein_str: + df = df[df["accession"].str.contains(f"{protein_str}", na=False)] + no_cols = set(PSM_USECOLS) - set(df.columns) + for col in no_cols: + if col=='unique': + df.loc[:,col] = df['accession'].apply(lambda x: 0 if ';' in x else 1) + else: + df.loc[:,col] = None + df.rename(columns=PSM_MAP,inplace=True) + self.transform_psm(df) + self.add_addition_msg(df) + df = self.convert_to_parquet(df) + yield df + + def transform_psm(self,df): + df.loc[:, 'pg_positions'] = df[['start','end']].apply( + lambda row: self.generate_positions(row["start"], row["end"]), + axis=1 + ) + df.loc[:, "scan_number"] = df["spectra_ref"].apply(lambda x: generate_scan_number(x)) + + df.loc[:, "reference_file_name"] = df["spectra_ref"].apply(lambda x: self._ms_runs[x[:x.index(':')]]) + df.loc[:, "additional_scores"] = df[list(self._score_names.values())].apply(self._genarate_additional_scores,axis=1) + df.loc[:,'peptidoform'] = df[["modifications", "sequence"]].apply( + lambda row: get_peptidoform_proforma_version_in_mztab(row["sequence"], row["modifications"], self._modifications), + axis=1 + ) + df.drop(['start','end',"spectra_ref","search_engine","search_engine_score[1]"], inplace=True, axis=1) + + def _genarate_additional_scores(self,cols): + struct_list = [] + for software,score in self._score_names.items(): + struct = {'name':software,'value':cols[score]} + struct_list.append(struct) + return struct_list + + def add_addition_msg(self,df): + df.loc[:,"protein_global_qvalue"] = df['pg_accessions'].map(self._protein_global_qvalue_map) + df.loc[:,"modification_details"] = None + df.loc[:,"predicted_rt"] = None + df.loc[:,"gg_accessions"] = None + df.loc[:,"gg_names"] = None + df.loc[:,"ion_mobility"] = None + df.loc[:,"num_peaks"] = None + df.loc[:,"mz_array"] = None + df.loc[:,"intensity_array"] = None + df.loc[:,"rank"] = None + df.loc[:,"cv_params"] = None + df.loc[:,"quantmsio_version"] = None + + def write_feature_to_file(self,output_path, chunksize=1000000, protein_file=None): + protein_list = extract_protein_list(protein_file) if protein_file else None + protein_str = "|".join(protein_list) if protein_list else None + pqwriter = None + for p in self.generate_report(chunksize=chunksize,protein_str=protein_str): + if not pqwriter: + pqwriter = pq.ParquetWriter(output_path, p.schema) + pqwriter.write_table(p) + if pqwriter: + pqwriter.close() + + def convert_to_parquet(self, res): + res["pg_accessions"] = res["pg_accessions"].str.split(";") + res["protein_global_qvalue"] = res["protein_global_qvalue"].astype(float) + res["unique"] = res["unique"].astype("Int32") + res["modifications"] = res["modifications"].apply(lambda x: self._generate_modification_list(x)) + res["precursor_charge"] = res["precursor_charge"].map(lambda x: None if pd.isna(x) else int(x)).astype("Int32") + res["calculated_mz"] = res["calculated_mz"].astype(float) + res["observed_mz"] = res["observed_mz"].astype(float) + res["posterior_error_probability"] = res["posterior_error_probability"].astype(float) + res["global_qvalue"] = res["global_qvalue"].astype(float) + res["is_decoy"] = res["is_decoy"].map(lambda x: None if pd.isna(x) else int(x)).astype("Int32") + + res["scan_number"] = res["scan_number"].astype(str) + + if "rt" in res.columns: + res["rt"] = res["rt"].astype(float) + else: + res.loc[:, "rt"] = None + return pa.Table.from_pandas(res, schema=PSM_SCHEMA) \ No newline at end of file diff --git a/tests/test_new_psm.py b/tests/test_new_psm.py new file mode 100644 index 0000000..7274e06 --- /dev/null +++ b/tests/test_new_psm.py @@ -0,0 +1,10 @@ +from .common import datafile +from unittest import TestCase +from quantmsio.temp_core.psm import PsmInMemory +class TestPSMHandler(TestCase): + + def test_convert_mztab_to_feature(self): + mztab_path = datafile("DDA-plex/MSV000079033.mzTab") + psm = PsmInMemory(mztab_path) + for _ in psm.generate_report(): + print("ok") \ No newline at end of file