From a68f506854a79b88f96de92077b8e0a552b9a775 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 6 Mar 2024 11:22:50 +0100 Subject: [PATCH 1/3] fix wfmash v0.12.6 parameters --- conf/modules.config | 12 ++++++++---- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/pangenome.nf | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a69540b..c5dc2c4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -102,7 +102,7 @@ process { withName: WFMASH_MAP_ALIGN { ext.args = { [ - "-n ${params.n_haplotypes - 1}", + "-n ${params.wfmash_n_mappings}", "-s ${parse_int(params.wfmash_segment_length)}", "-p ${params.wfmash_map_pct_id}", params.wfmash_merge_segments ? "-M" : "", @@ -114,6 +114,7 @@ process { "${wfmash_sparse_map_cmd}", params.wfmash_temp_dir ? "-B ${wfmash_temp_dir}" : "", "-2 ${params.wfmash_hg_filter_ani_diff}", + "--lower-triangular", ].join(" ").trim() } publishDir = [ @@ -126,7 +127,7 @@ process { withName: WFMASH_MAP { ext.args = { [ - "-n ${params.n_haplotypes - 1}", + "-n ${params.wfmash_n_mappings}", "-s ${parse_int(params.wfmash_segment_length)}", "-p ${params.wfmash_map_pct_id}", params.wfmash_merge_segments ? "-M" : "", @@ -139,6 +140,7 @@ process { params.wfmash_temp_dir ? "-B ${wfmash_temp_dir}" : "", "-m", "-2 ${params.wfmash_hg_filter_ani_diff}", + "--lower-triangular", ].join(" ").trim() } publishDir = [ @@ -151,7 +153,7 @@ process { withName: WFMASH_MAP_COMMUNITY { ext.args = { [ - "-n ${params.n_haplotypes - 1}", + "-n ${params.wfmash_n_mappings}", "-s ${parse_int(params.wfmash_segment_length)}", "-p ${params.wfmash_map_pct_id}", params.wfmash_merge_segments ? "-M" : "", @@ -164,6 +166,7 @@ process { params.wfmash_temp_dir ? "-B ${wfmash_temp_dir}" : "", "-m", "-2 ${params.wfmash_hg_filter_ani_diff}", + "--lower-triangular", ].join(" ").trim() } publishDir = [ @@ -185,7 +188,7 @@ process { withName: WFMASH_ALIGN { ext.args = { [ - "-n ${params.n_haplotypes - 1}", + "-n ${params.wfmash_n_mappings}", "-s ${parse_int(params.wfmash_segment_length)}", "-p ${params.wfmash_map_pct_id}", params.wfmash_merge_segments ? "-M" : "", @@ -198,6 +201,7 @@ process { params.wfmash_temp_dir ? "-B ${wfmash_temp_dir}" : "", "--invert-filtering", "-2 ${params.wfmash_hg_filter_ani_diff}", + "--lower-triangular", ].join(" ").trim() } publishDir = [ diff --git a/nextflow.config b/nextflow.config index 96de208..d0d726c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,7 @@ params { wfmash_only = false wfmash_temp_dir = null wfmash_hg_filter_ani_diff = 30 + wfmash_n_mappings = 1 // Seqwish options seqwish_paf = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 2b6a4a0..884f43f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -119,6 +119,11 @@ "type": "integer", "default": 30, "description": "Filter out mappings unlikely to be this Average Nucleotide Identity (ANI) less than the best mapping." + }, + "wfmash_n_mappings": { + "type": "integer", + "default": 1, + "description": "Number of mappings for each segment." } } }, diff --git a/workflows/pangenome.nf b/workflows/pangenome.nf index c75684f..dc4d7c8 100644 --- a/workflows/pangenome.nf +++ b/workflows/pangenome.nf @@ -132,7 +132,7 @@ workflow PANGENOME { ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) if (!params.communities) { if (!params.wfmash_only) { - ch_multiqc_files = ch_multiqc_files.mix(PGGB.out.qc.map{return it[1..8]}) + ch_multiqc_files = ch_multiqc_files.mix(PGGB.out.qc.map{return it[1..9]}) } } else { ch_multiqc_files = ch_multiqc_files.mix(ODGI_QC.out.qc.map{return it[1..8]}) From 355329f5f375ee1dd88363405b750d92dfa6827b Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 6 Mar 2024 15:21:23 +0100 Subject: [PATCH 2/3] give smoothxg more threads in hla.config --- conf/hla.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/hla.config b/conf/hla.config index 9e3c657..d7c8f8a 100644 --- a/conf/hla.config +++ b/conf/hla.config @@ -15,7 +15,7 @@ process { } withName:'SMOOTHXG' { - cpus = 4 + cpus = 8 memory = 8.GB // container = "ghcr.io/pangenome/pggb:20230331171956507fc0" } From b9ffbeb0915dc3e77f1252ce065c3d5368982dfa Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 6 Mar 2024 15:22:02 +0100 Subject: [PATCH 3/3] revert vg version back to 1.40 and update vg deconstruct code --- .github/workflows/ci.yml | 4 ++-- modules/local/vg_deconstruct/main.nf | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 68cba8e..77c0ee1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,7 +50,7 @@ jobs: strategy: matrix: parameters: - - '--vcf_spec "gi|568815561:#,gi|568815567:#"' + - '--vcf_spec "gi|568815561:5,gi|568815567:25"' steps: - name: Check out pipeline code uses: actions/checkout@v3 @@ -164,7 +164,7 @@ jobs: parameters: - "--communities" - "--communities --wfmash_chunks 2" - - '--communities --vcf_spec "gi|568815561:#,gi|568815567:#"' + - '--communities --vcf_spec "gi|568815561:5,gi|568815567:25"' steps: - name: Check out pipeline code uses: actions/checkout@v3 diff --git a/modules/local/vg_deconstruct/main.nf b/modules/local/vg_deconstruct/main.nf index ae6092f..c3557ba 100644 --- a/modules/local/vg_deconstruct/main.nf +++ b/modules/local/vg_deconstruct/main.nf @@ -29,7 +29,7 @@ process VG_DECONSTRUCT { 'https://depot.galaxyproject.org/singularity/pggb:0.5.3--hdfd78af_2': 'quay.io/biocontainers/pggb:0.5.3--hdfd78af_2' }" */ - container "ghcr.io/pangenome/pggb:20230819064109936a2c" + container "ghcr.io/pangenome/pggb:202402032147026ffe7f" input: tuple val(meta), path(graph), val(vcf_spec) @@ -47,16 +47,20 @@ process VG_DECONSTRUCT { def prefix = task.ext.prefix ?: "${meta.id}" """ ref=\$(echo "$vcf_spec" | cut -f 1 -d:) - delim=\$(echo "$vcf_spec" | cut -f 2 -d:) - pop_length=\$(echo "$vcf_spec" | cut -f 3 -d:) + if [[ "$vcf_spec" == *":"* ]]; then + pop_length=\$(echo "$vcf_spec" | cut -f 2 -d:) + else + pop_length="" + fi if [[ -z \$pop_length ]]; then pop_length=0 fi vcf="${graph}".\$(echo \$ref | tr '/|' '_').vcf - vg deconstruct -P \$ref -H \$delim -e -a -t "${task.cpus}" "${graph}" > \$vcf + vg deconstruct -P \$ref -H "#" -e -a -t "${task.cpus}" "${graph}" > \$vcf bcftools stats \$vcf > \$vcf.stats + if [[ \$pop_length -gt 0 ]]; then vcf_decomposed=${graph}.final.\$(echo \$ref | tr '/|' '_').decomposed.vcf vcf_decomposed_tmp=\$vcf_decomposed.tmp.vcf @@ -65,7 +69,7 @@ process VG_DECONSTRUCT { #TODO: to remove when vcfwave will be bug-free # The TYPE info sometimes is wrong/missing # There are variants without the ALT allele - bcftools sort \$vcf_decomposed_tmp | bcftools annotate -x INFO/TYPE \$vcf_decomposed_tmp | awk '\$5 != "."' > \$vcf_decomposed + bcftools sort \$vcf_decomposed_tmp | bcftools annotate -x INFO/TYPE | awk '\$5 != "."' > \$vcf_decomposed rm \$vcf_decomposed_tmp \$vcf.gz bcftools stats \$vcf_decomposed > \$vcf_decomposed.stats fi