diff --git a/workflow/envs/metaproteomics.yaml b/workflow/envs/metaproteomics.yaml index 085af64..a631f66 100644 --- a/workflow/envs/metaproteomics.yaml +++ b/workflow/envs/metaproteomics.yaml @@ -5,146 +5,16 @@ channels: - anaconda - defaults dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - alsa-lib=1.2.8=h166bdaf_0 - - blast=2.2.31=pl526he19e7b1_5 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2022.12.7=ha878542_0 - - cairo=1.16.0=ha61ee94_1014 - - certifi=2022.12.7=pyhd8ed1ab_0 - - charset-normalizer=2.1.1=pyhd8ed1ab_0 - - colorama=0.4.6=pyhd8ed1ab_0 - - curl=7.87.0=hdc1c0ab_0 - - dotnet-runtime=3.1.18=h73ebe80_0 - - entrez-direct=16.2=he881be0_1 - - expat=2.5.0=h27087fc_0 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.1=hc2a2eb6_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.12.1=hca18f0e_1 - - gettext=0.21.1=h27087fc_0 - - giflib=5.2.1=h36c2ea0_2 - - graphite2=1.3.13=h58526e2_1001 - - harfbuzz=6.0.0=h8e241bc_0 - - icu=70.1=h27087fc_0 - - idna=3.4=pyhd8ed1ab_0 - - jpeg=9e=h166bdaf_2 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - lcms2=2.14=hfd0df8a_1 - - ld_impl_linux-64=2.39=hcc3a1bd_1 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcups=2.3.3=h36d4200_3 - - libcurl=7.87.0=hdc1c0ab_0 - - libdeflate=1.14=h166bdaf_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libglib=2.74.1=h606061b_1 - - libgomp=12.2.0=h65d4601_19 - - libiconv=1.17=h166bdaf_0 - - libidn2=2.3.4=h166bdaf_0 - - liblapack=3.9.0=16_linux64_openblas - - libnghttp2=1.47.0=hff17c54_1 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libpng=1.6.39=h753d276_0 - - libsqlite=3.40.0=h753d276_0 - - libssh2=1.10.0=hf14f497_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libtiff=4.5.0=h82bc61c_0 - - libunistring=0.9.10=h7f98852_0 - - liburcu=0.13.2=h166bdaf_0 - - libuuid=2.32.1=h7f98852_1000 - - libwebp-base=1.2.4=h166bdaf_0 - - libxcb=1.13=h7f98852_1004 - - libzlib=1.2.13=h166bdaf_4 - - lttng-ust=2.13.4=hfdfcbd3_0 - - metamorpheus=0.0.317=h7f98852_2 - - mono=6.12.0.90=h780b84a_0 - - ncurses=6.3=h27087fc_1 - - openjdk=17.0.3=h58dac75_5 - - openssl=3.0.7=h0b41bf4_1 - - pcre=8.45=h9c3ff4c_0 - - pcre2=10.40=hc3806b6_0 - - peptide-shaker=2.2.6=hec16e2b_1 + - curl + - peptide-shaker=2 - perl=5.26.2=h36c2ea0_1008 - - perl-archive-tar=2.32=pl526_0 - - perl-carp=1.38=pl526_3 - - perl-common-sense=3.74=pl526_2 - - perl-compress-raw-bzip2=2.087=pl526he1b5a44_0 - - perl-compress-raw-zlib=2.087=pl526hc9558a2_0 - - perl-exporter=5.72=pl526_1 - - perl-exporter-tiny=1.002001=pl526_0 - - perl-extutils-makemaker=7.36=pl526_1 - - perl-io-compress=2.087=pl526he1b5a44_0 - - perl-io-zlib=1.10=pl526_2 - - perl-json=4.02=pl526_0 - - perl-json-xs=2.34=pl526h6bb024c_3 - - perl-list-moreutils=0.428=pl526_1 - - perl-list-moreutils-xs=0.428=pl526_0 - - perl-pathtools=3.75=pl526h14c3975_1 - - perl-scalar-list-utils=1.52=pl526h516909a_0 - - perl-types-serialiser=1.0=pl526_2 - - perl-xsloader=0.24=pl526_0 - - pip=22.3.1=pyhd8ed1ab_0 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pycparser=2.21=pyhd8ed1ab_0 - - pyopenssl=22.1.0=pyhd8ed1ab_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.11.0=ha86cf86_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python_abi=3.11=3_cp311 - - pytz=2022.7=pyhd8ed1ab_0 - - readline=8.1.2=h0f457ee_0 - - requests=2.28.1=pyhd8ed1ab_1 - - searchgui=4.0.41=h779adbc_1 - - seqkit=2.3.1=h9ee0642_0 - - setuptools=65.6.3=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - tk=8.6.12=h27826a3_0 - - tqdm=4.64.1=pyhd8ed1ab_0 - - tzdata=2022g=h191b570_0 - - urllib3=1.26.13=pyhd8ed1ab_0 - - wget=1.20.3=ha35d2d1_1 - - wheel=0.38.4=pyhd8ed1ab_0 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.0.10=h7f98852_0 - - xorg-libsm=1.2.3=hd9c2040_1000 - - xorg-libx11=1.7.2=h7f98852_0 - - xorg-libxau=1.0.9=h7f98852_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h7f98852_1 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-libxtst=1.2.3=h7f98852_1002 - - xorg-recordproto=1.14.2=h7f98852_1002 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h7f98852_1002 - - xorg-xproto=7.0.31=h7f98852_1007 - - xtandem=15.12.15.2=h072c6ed_6 - - xz=5.2.6=h166bdaf_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h6239696_4 - - pip: - - brotlipy==0.7.0 - - cffi==1.15.1 - - cryptography==38.0.4 - - numpy==1.24.0 - - pandas==1.5.2 + - readline + - requests + - searchgui=4 + - seqkit=2 + - tqdm + - urllib3 + - wget + - numpy + - pandas prefix: /opt/conda/envs/my_env diff --git a/workflow/rules/general_report.smk b/workflow/rules/general_report.smk index c78029b..424ebae 100644 --- a/workflow/rules/general_report.smk +++ b/workflow/rules/general_report.smk @@ -2,17 +2,18 @@ rule protein_report: input: expand("{output}/Annotation/{sample}/UPIMAPI_results.tsv", output=OUTPUT, sample=set(EXPS['Sample'])), expand("{output}/Annotation/{sample}/reCOGnizer_results.xlsx", output=OUTPUT, sample=set(EXPS["Sample"])), - expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])), - expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])), - expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])), - expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])), + expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])) if len(mt_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mt_exps) > 0 else [], expand("{output}/Metaproteomics/{sample}_mp.spectracounts", output=OUTPUT, sample=set(mp_exps['Sample'])) output: expand("{output}/MOSCA_{sample}_General_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])), f"{OUTPUT}/MOSCA_General_Report.xlsx", f"{OUTPUT}/Quantification/dea_input.tsv", - f"{OUTPUT}/Quantification/mg_entry_quant.tsv", - f"{OUTPUT}/Quantification/mt_entry_quant.tsv" if len(mt_exps) > 0 else f"{OUTPUT}/Metaproteomics/mp_entry_quant.tsv" + f"{OUTPUT}/Quantification/mg_entry_quant.tsv" if len(mg_exps) > 0 else [], + f"{OUTPUT}/Quantification/mt_entry_quant.tsv" if len(mt_exps) > 0 else [], + f"{OUTPUT}/Metaproteomics/mp_entry_quant.tsv" if len(mp_exps) > 0 else [] threads: 1 params: diff --git a/workflow/rules/metaproteomics.smk b/workflow/rules/metaproteomics.smk index e3f9df2..eba29f5 100644 --- a/workflow/rules/metaproteomics.smk +++ b/workflow/rules/metaproteomics.smk @@ -1,9 +1,9 @@ rule metaproteomics: input: [directory(folder) for folder in mp_exps[mp_exps['Sample'] == (lambda wildcards: wildcards.sample)]['Files']], - "{output}/Annotation/{sample}/UPIMAPI_results.tsv" + "{out_dir}/Annotation/{sample}/UPIMAPI_results.tsv" output: - "{output}/Metaproteomics/{sample}_mp_spectracounts.tsv" + "{out_dir}/Metaproteomics/{sample}_mp.spectracounts" threads: config["threads"] params: diff --git a/workflow/rules/quantification.smk b/workflow/rules/quantification.smk index b2ff10e..561930c 100644 --- a/workflow/rules/quantification.smk +++ b/workflow/rules/quantification.smk @@ -6,10 +6,10 @@ rule quantification: expand("{output}/Annotation/{sample}/fgs.ffn", output=OUTPUT, sample=set(EXPS["Sample"])) output: expand("{output}/Quantification/{name}.readcounts", output=OUTPUT, name=set(not_mp_exps['Name'])), - expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])), - expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])), - expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])), - expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) + expand("{output}/Quantification/{sample}_mg.readcounts", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mg_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mg_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mt.readcounts", output=OUTPUT, sample=set(mt_exps['Sample'])) if len(mt_exps) > 0 else [], + expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])) if len(mt_exps) > 0 else [] threads: config["threads"] params: diff --git a/workflow/scripts/metaproteomics.py b/workflow/scripts/metaproteomics.py index 397748e..862d8c3 100644 --- a/workflow/scripts/metaproteomics.py +++ b/workflow/scripts/metaproteomics.py @@ -160,8 +160,7 @@ def create_decoy_database(self, database): def generate_parameters_file( self, output, protein_fdr=1, frag_tol=0.5, prec_tol=10, enzyme='Trypsin', mc=2, - fixed_mods=('Carbamidomethylation of C'), - variable_mods=('Oxidation of M', 'Acetylation of protein N-term')): + fixed_mods='Carbamidomethylation of C', variable_mods='Oxidation of M,Acetylation of protein N-term'): """ param: output: name of parameters file param: protein_fdr: float - FDR at the protein level in percent @@ -169,14 +168,15 @@ def generate_parameters_file( param: prec_tol: float - precursor ion mass tolerance in ppm param: enzyme: str - enzyme used for digestion param: mc: int - maximum number of missed cleavages - param: fixed_mods: tuple - fixed modifications - param: variable_mods: tuple - variable modifications + param: fixed_mods: str - fixed modifications comma-separated + param: variable_mods: str - variable modifications comma-separated returns: a parameters file will be produced for SearchCLI and/or PeptideShakerCLI """ + fixed_mods, variable_mods = fixed_mods.split(','), variable_mods.split(',') run_pipe_command( - f'''searchgui eu.isas.searchgui.cmd.IdentificationParametersCLI -out {output} -prec_tol {prec_tol} ' - f'-frag_tol {frag_tol} -enzyme {enzyme} -fixed_mods "{', '.join(fixed_mods)}" -variable_mods ' - f'"{', '.join(variable_mods)}" -mc {mc} -protein_fdr {protein_fdr}''') + f'''searchgui eu.isas.searchgui.cmd.IdentificationParametersCLI -out {output} -prec_tol {prec_tol} ''' + + f'''-frag_tol {frag_tol} -enzyme {enzyme} -fixed_mods "{', '.join(fixed_mods)}" ''' + + f'''-variable_mods "{', '.join(variable_mods)}" -mc {mc} -protein_fdr {protein_fdr}''') def split_database(self, database, n_proteins=5000000): """