Skip to content

Commit

Permalink
Several fixes on MP workflow
Browse files Browse the repository at this point in the history
Fix on inputting mods to generate_parameters_file
  • Loading branch information
iquasere committed Dec 30, 2023
1 parent 4338d6d commit 8860a1c
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 160 deletions.
152 changes: 11 additions & 141 deletions workflow/envs/metaproteomics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,146 +5,16 @@ channels:
- anaconda
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- alsa-lib=1.2.8=h166bdaf_0
- blast=2.2.31=pl526he19e7b1_5
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2022.12.7=ha878542_0
- cairo=1.16.0=ha61ee94_1014
- certifi=2022.12.7=pyhd8ed1ab_0
- charset-normalizer=2.1.1=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- curl=7.87.0=hdc1c0ab_0
- dotnet-runtime=3.1.18=h73ebe80_0
- entrez-direct=16.2=he881be0_1
- expat=2.5.0=h27087fc_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.14.1=hc2a2eb6_0
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- freetype=2.12.1=hca18f0e_1
- gettext=0.21.1=h27087fc_0
- giflib=5.2.1=h36c2ea0_2
- graphite2=1.3.13=h58526e2_1001
- harfbuzz=6.0.0=h8e241bc_0
- icu=70.1=h27087fc_0
- idna=3.4=pyhd8ed1ab_0
- jpeg=9e=h166bdaf_2
- keyutils=1.6.1=h166bdaf_0
- krb5=1.20.1=h81ceb04_0
- lcms2=2.14=hfd0df8a_1
- ld_impl_linux-64=2.39=hcc3a1bd_1
- lerc=4.0.0=h27087fc_0
- libblas=3.9.0=16_linux64_openblas
- libcblas=3.9.0=16_linux64_openblas
- libcups=2.3.3=h36d4200_3
- libcurl=7.87.0=hdc1c0ab_0
- libdeflate=1.14=h166bdaf_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=h516909a_1
- libffi=3.4.2=h7f98852_5
- libgcc-ng=12.2.0=h65d4601_19
- libgfortran-ng=12.2.0=h69a702a_19
- libgfortran5=12.2.0=h337968e_19
- libglib=2.74.1=h606061b_1
- libgomp=12.2.0=h65d4601_19
- libiconv=1.17=h166bdaf_0
- libidn2=2.3.4=h166bdaf_0
- liblapack=3.9.0=16_linux64_openblas
- libnghttp2=1.47.0=hff17c54_1
- libnsl=2.0.0=h7f98852_0
- libopenblas=0.3.21=pthreads_h78a6416_3
- libpng=1.6.39=h753d276_0
- libsqlite=3.40.0=h753d276_0
- libssh2=1.10.0=hf14f497_3
- libstdcxx-ng=12.2.0=h46fd767_19
- libtiff=4.5.0=h82bc61c_0
- libunistring=0.9.10=h7f98852_0
- liburcu=0.13.2=h166bdaf_0
- libuuid=2.32.1=h7f98852_1000
- libwebp-base=1.2.4=h166bdaf_0
- libxcb=1.13=h7f98852_1004
- libzlib=1.2.13=h166bdaf_4
- lttng-ust=2.13.4=hfdfcbd3_0
- metamorpheus=0.0.317=h7f98852_2
- mono=6.12.0.90=h780b84a_0
- ncurses=6.3=h27087fc_1
- openjdk=17.0.3=h58dac75_5
- openssl=3.0.7=h0b41bf4_1
- pcre=8.45=h9c3ff4c_0
- pcre2=10.40=hc3806b6_0
- peptide-shaker=2.2.6=hec16e2b_1
- curl
- peptide-shaker=2
- perl=5.26.2=h36c2ea0_1008
- perl-archive-tar=2.32=pl526_0
- perl-carp=1.38=pl526_3
- perl-common-sense=3.74=pl526_2
- perl-compress-raw-bzip2=2.087=pl526he1b5a44_0
- perl-compress-raw-zlib=2.087=pl526hc9558a2_0
- perl-exporter=5.72=pl526_1
- perl-exporter-tiny=1.002001=pl526_0
- perl-extutils-makemaker=7.36=pl526_1
- perl-io-compress=2.087=pl526he1b5a44_0
- perl-io-zlib=1.10=pl526_2
- perl-json=4.02=pl526_0
- perl-json-xs=2.34=pl526h6bb024c_3
- perl-list-moreutils=0.428=pl526_1
- perl-list-moreutils-xs=0.428=pl526_0
- perl-pathtools=3.75=pl526h14c3975_1
- perl-scalar-list-utils=1.52=pl526h516909a_0
- perl-types-serialiser=1.0=pl526_2
- perl-xsloader=0.24=pl526_0
- pip=22.3.1=pyhd8ed1ab_0
- pixman=0.40.0=h36c2ea0_0
- pthread-stubs=0.4=h36c2ea0_1001
- pycparser=2.21=pyhd8ed1ab_0
- pyopenssl=22.1.0=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.11.0=ha86cf86_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python_abi=3.11=3_cp311
- pytz=2022.7=pyhd8ed1ab_0
- readline=8.1.2=h0f457ee_0
- requests=2.28.1=pyhd8ed1ab_1
- searchgui=4.0.41=h779adbc_1
- seqkit=2.3.1=h9ee0642_0
- setuptools=65.6.3=pyhd8ed1ab_0
- six=1.16.0=pyh6c4a22f_0
- tk=8.6.12=h27826a3_0
- tqdm=4.64.1=pyhd8ed1ab_0
- tzdata=2022g=h191b570_0
- urllib3=1.26.13=pyhd8ed1ab_0
- wget=1.20.3=ha35d2d1_1
- wheel=0.38.4=pyhd8ed1ab_0
- xorg-fixesproto=5.0=h7f98852_1002
- xorg-inputproto=2.3.2=h7f98852_1002
- xorg-kbproto=1.0.7=h7f98852_1002
- xorg-libice=1.0.10=h7f98852_0
- xorg-libsm=1.2.3=hd9c2040_1000
- xorg-libx11=1.7.2=h7f98852_0
- xorg-libxau=1.0.9=h7f98852_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xorg-libxext=1.3.4=h7f98852_1
- xorg-libxfixes=5.0.3=h7f98852_1004
- xorg-libxi=1.7.10=h7f98852_0
- xorg-libxrender=0.9.10=h7f98852_1003
- xorg-libxtst=1.2.3=h7f98852_1002
- xorg-recordproto=1.14.2=h7f98852_1002
- xorg-renderproto=0.11.1=h7f98852_1002
- xorg-xextproto=7.3.0=h7f98852_1002
- xorg-xproto=7.0.31=h7f98852_1007
- xtandem=15.12.15.2=h072c6ed_6
- xz=5.2.6=h166bdaf_0
- zlib=1.2.13=h166bdaf_4
- zstd=1.5.2=h6239696_4
- pip:
- brotlipy==0.7.0
- cffi==1.15.1
- cryptography==38.0.4
- numpy==1.24.0
- pandas==1.5.2
- readline
- requests
- searchgui=4
- seqkit=2
- tqdm
- urllib3
- wget
- numpy
- pandas
prefix: /opt/conda/envs/my_env
13 changes: 7 additions & 6 deletions workflow/rules/general_report.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ rule protein_report:
input:
expand("{output}/Annotation/{sample}/UPIMAPI_results.tsv", output=OUTPUT, sample=set(EXPS['Sample'])),
expand("{output}/Annotation/{sample}/reCOGnizer_results.xlsx", output=OUTPUT, sample=set(EXPS["Sample"])),
expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])),
expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])) if len(mt_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mt_exps) > 0 else [],
expand("{output}/Metaproteomics/{sample}_mp.spectracounts", output=OUTPUT, sample=set(mp_exps['Sample']))
output:
expand("{output}/MOSCA_{sample}_General_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
f"{OUTPUT}/MOSCA_General_Report.xlsx",
f"{OUTPUT}/Quantification/dea_input.tsv",
f"{OUTPUT}/Quantification/mg_entry_quant.tsv",
f"{OUTPUT}/Quantification/mt_entry_quant.tsv" if len(mt_exps) > 0 else f"{OUTPUT}/Metaproteomics/mp_entry_quant.tsv"
f"{OUTPUT}/Quantification/mg_entry_quant.tsv" if len(mg_exps) > 0 else [],
f"{OUTPUT}/Quantification/mt_entry_quant.tsv" if len(mt_exps) > 0 else [],
f"{OUTPUT}/Metaproteomics/mp_entry_quant.tsv" if len(mp_exps) > 0 else []
threads:
1
params:
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/metaproteomics.smk
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
rule metaproteomics:
input:
[directory(folder) for folder in mp_exps[mp_exps['Sample'] == (lambda wildcards: wildcards.sample)]['Files']],
"{output}/Annotation/{sample}/UPIMAPI_results.tsv"
"{out_dir}/Annotation/{sample}/UPIMAPI_results.tsv"
output:
"{output}/Metaproteomics/{sample}_mp_spectracounts.tsv"
"{out_dir}/Metaproteomics/{sample}_mp.spectracounts"
threads:
config["threads"]
params:
Expand Down
8 changes: 4 additions & 4 deletions workflow/rules/quantification.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ rule quantification:
expand("{output}/Annotation/{sample}/fgs.ffn", output=OUTPUT, sample=set(EXPS["Sample"]))
output:
expand("{output}/Quantification/{name}.readcounts", output=OUTPUT, name=set(not_mp_exps['Name'])),
expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])),
expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample']))
expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])) if len(mt_exps) > 0 else [],
expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mt_exps) > 0 else []
threads:
config["threads"]
params:
Expand Down
14 changes: 7 additions & 7 deletions workflow/scripts/metaproteomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,23 +160,23 @@ def create_decoy_database(self, database):

def generate_parameters_file(
self, output, protein_fdr=1, frag_tol=0.5, prec_tol=10, enzyme='Trypsin', mc=2,
fixed_mods=('Carbamidomethylation of C'),
variable_mods=('Oxidation of M', 'Acetylation of protein N-term')):
fixed_mods='Carbamidomethylation of C', variable_mods='Oxidation of M,Acetylation of protein N-term'):
"""
param: output: name of parameters file
param: protein_fdr: float - FDR at the protein level in percent
param: frag_tol: float - fragment ion mass tolerance in ppm
param: prec_tol: float - precursor ion mass tolerance in ppm
param: enzyme: str - enzyme used for digestion
param: mc: int - maximum number of missed cleavages
param: fixed_mods: tuple - fixed modifications
param: variable_mods: tuple - variable modifications
param: fixed_mods: str - fixed modifications comma-separated
param: variable_mods: str - variable modifications comma-separated
returns: a parameters file will be produced for SearchCLI and/or PeptideShakerCLI
"""
fixed_mods, variable_mods = fixed_mods.split(','), variable_mods.split(',')
run_pipe_command(
f'''searchgui eu.isas.searchgui.cmd.IdentificationParametersCLI -out {output} -prec_tol {prec_tol} '
f'-frag_tol {frag_tol} -enzyme {enzyme} -fixed_mods "{', '.join(fixed_mods)}" -variable_mods '
f'"{', '.join(variable_mods)}" -mc {mc} -protein_fdr {protein_fdr}''')
f'''searchgui eu.isas.searchgui.cmd.IdentificationParametersCLI -out {output} -prec_tol {prec_tol} ''' +
f'''-frag_tol {frag_tol} -enzyme {enzyme} -fixed_mods "{', '.join(fixed_mods)}" ''' +
f'''-variable_mods "{', '.join(variable_mods)}" -mc {mc} -protein_fdr {protein_fdr}''')

def split_database(self, database, n_proteins=5000000):
"""
Expand Down

0 comments on commit 8860a1c

Please sign in to comment.