Merge pull request #175 from nf-core/final_prerelease_1.1.0_updates

Final pre-release 1.1.0 updates
nf-core · Feb 23, 2024 · ac2ebd7 · ac2ebd7
2 parents 400ed3e + d4168fb
commit ac2ebd7
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 7 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -113,6 +113,7 @@ process {
                 "-H ${params.wfmash_mash_kmer_thres}",
                 "${wfmash_sparse_map_cmd}",
                 params.wfmash_temp_dir         ? "-B ${wfmash_temp_dir}"             : "",
+                "-2 ${params.wfmash_hg_filter_ani_diff}",
             ].join(" ").trim()
         }
         publishDir = [
@@ -137,6 +138,7 @@ process {
                 "${wfmash_sparse_map_cmd}",
                 params.wfmash_temp_dir         ? "-B ${wfmash_temp_dir}"             : "",
                 "-m",
+                "-2 ${params.wfmash_hg_filter_ani_diff}",
             ].join(" ").trim()
         }
         publishDir = [
@@ -161,6 +163,7 @@ process {
                 "${wfmash_sparse_map_cmd}",
                 params.wfmash_temp_dir         ? "-B ${wfmash_temp_dir}"             : "",
                 "-m",
+                "-2 ${params.wfmash_hg_filter_ani_diff}",
             ].join(" ").trim()
         }
         publishDir = [
@@ -194,6 +197,7 @@ process {
                 "${wfmash_sparse_map_cmd}",
                 params.wfmash_temp_dir         ? "-B ${wfmash_temp_dir}"             : "",
                 "--invert-filtering",
+                "-2 ${params.wfmash_hg_filter_ani_diff}",
             ].join(" ").trim()
         }
         publishDir = [

diff --git a/modules/local/vg_deconstruct/main.nf b/modules/local/vg_deconstruct/main.nf
@@ -65,7 +65,7 @@ process VG_DECONSTRUCT {
         #TODO: to remove when vcfwave will be bug-free
         # The TYPE info sometimes is wrong/missing
         # There are variants without the ALT allele
-        bcftools annotate -x INFO/TYPE \$vcf_decomposed_tmp  | awk '\$5 != "."' > \$vcf_decomposed
+        bcftools sort \$vcf_decomposed_tmp | bcftools annotate -x INFO/TYPE \$vcf_decomposed_tmp  | awk '\$5 != "."' > \$vcf_decomposed
         rm \$vcf_decomposed_tmp \$vcf.gz
         bcftools stats \$vcf_decomposed > \$vcf_decomposed.stats
     fi

diff --git a/nextflow.config b/nextflow.config
@@ -26,10 +26,11 @@ params {
     wfmash_chunks               = 1
     wfmash_only                 = false
     wfmash_temp_dir             = null
+    wfmash_hg_filter_ani_diff   = 30
 
     // Seqwish options
     seqwish_paf = null
-    seqwish_min_match_length = 19
+    seqwish_min_match_length = 23
     seqwish_transclose_batch = 10000000
     seqwish_sparse_factor = 0.0
     seqwish_temp_dir = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -98,8 +98,7 @@
                 },
                 "wfmash_exclude_delim": {
                     "type": "string",
-                    "description": "Skip mappings between sequences with the same name prefix before the given delimiter character. [default: all-vs-all and !self].",
-                    "hidden": true
+                    "description": "Skip mappings between sequences with the same name prefix before the given delimiter character. [default: all-vs-all and !self]."
                 },
                 "wfmash_temp_dir": {
                     "type": "string",
@@ -115,6 +114,10 @@
                 "wfmash_only": {
                     "type": "boolean",
                     "description": "If this parameter is set, only the wfmash alignment step of the pipeline is executed. This option is offered for users who want to run wfmash on a cluster."
+                },
+                "wfmash_hg_filter_ani_diff": {
+                    "type": "integer",
+                    "default": 30
                 }
             }
         },
@@ -126,7 +129,7 @@
             "properties": {
                 "seqwish_min_match_length": {
                     "type": "integer",
-                    "default": 19,
+                    "default": 23,
                     "description": "Ignores exact matches below this length.",
                     "help_text": "Graph induction with seqwish often works better when we filter very short matches out of the input alignments. In practice, these often occur in regions of low alignment quality, which are typical of areas with large INDELs and structural variations in the wfmash alignments. This underalignment is then resolved in the smoothxg step. Removing short matches can simplify the graph and remove spurious relationships caused by short repeated homologies.\nA setting of --seqwish_min_match_length 47 is optimal for around 5% divergence, and we suggest lowering it for higher divergence and increasing it for lower divergence. Values up to --seqwish_min_match_length 311 work well for human haplotypes. In effect, setting --seqwish_min_match_length to N means that we can tolerate a local pairwise difference rate of no more than 1/N. Thus, INDELs which may be represented by complex series of edit operations will be opened into bubbles in the induced graph, and alignment regions with very low identity will be ignored. Using affine-gapped alignment (such as with minimap2) may reduce the impact of this step by representing large indels more precisely in the input alignments. However, it remains important due to local inconsistency in alignments in low-complexity sequence."
                 },
@@ -139,7 +142,7 @@
                 },
                 "seqwish_sparse_factor": {
                     "type": "number",
-                    "default": 0.0,
+                    "default": 0,
                     "description": "Keep this randomly selected fraction of input matches."
                 },
                 "seqwish_temp_dir": {
@@ -459,7 +462,8 @@
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters in lenient more.",
                     "hidden": true,
-                    "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+                    "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).",
+                    "default": true
                 },
                 "show_hidden_params": {
                     "type": "boolean",