phac-nml · kylacochrane · Jun 12, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/modules/local/cluster_file/main.nf b/modules/local/cluster_file/main.nf
@@ -6,7 +6,7 @@ process CLUSTER_FILE {
     val meta
 
     output:
-    path("expected_clusters.txt"), emit: text
+    path("reference_clusters.txt"), emit: text
 
     exec:
     def outputLines = []
@@ -37,7 +37,7 @@ process CLUSTER_FILE {
     }
 
     // Write the text file, iterating over each sample
-    task.workDir.resolve("expected_clusters.txt").withWriter { writer ->
+    task.workDir.resolve("reference_clusters.txt").withWriter { writer ->
         outputLines.each { line ->
             writer.writeLine(line)
         }

diff --git a/modules/local/filter_query/main.nf b/modules/local/filter_query/main.nf
@@ -7,7 +7,7 @@ process FILTER_QUERY {
         'biocontainers/csvtk:0.22.0--h9ee0642_1' }"
 
     input:
-    val input_query
+    val query_ids
     path addresses
     val in_format
     val out_format
@@ -17,19 +17,19 @@ process FILTER_QUERY {
     path("versions.yml"),       emit: versions
 
     script:
-
-    def queryID = input_query[0].id
     def outputFile = "new_addresses"
-
     def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
     def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
     def out_extension = out_format == "tsv" ? 'tsv' : 'csv'
 
+    // Join the query IDs in the correct csvtk filter2 required format
+    def queryID = query_ids.collect { id -> "\$id == \"${id}\"" }.join(" || ")
+
     """
     # Filter the query samples only; keep only the 'id' and 'address' columns
     csvtk filter2 \\
         ${addresses} \\
-        --filter '\$id == \"$queryID\"' \\
+        --filter '$queryID' \\
         --delimiter "${delimiter}" \\
         --out-delimiter "${out_delimiter}" | \\
     csvtk cut -f id,address > ${outputFile}.${out_extension}
@@ -39,5 +39,7 @@ process FILTER_QUERY {
         csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
     END_VERSIONS
     """
+
+
 }
 
diff --git a/modules/local/input_check/main.nf b/modules/local/input_check/main.nf
@@ -1,6 +1,7 @@
 process INPUT_CHECK{
     tag "Check Sample Inputs and Generate Error Report"
     label 'process_single'
+    fair true
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/python:3.8.3' :

diff --git a/tests/data/called/expected_results_queries.txt b/tests/data/called/expected_results_queries.txt
@@ -0,0 +1,6 @@
+id	address	level_1	level_2	level_3
+sample1	1.1.1	1	1	1
+sample2	1.1.1	1	1	1
+sample3	1.1.2	1	1	2
+sampleQ	2.2.3	2	2	3
+sampleN	2.2.3	2	2	3
diff --git a/tests/data/distances/expected_pairwise_queries_dists.txt b/tests/data/distances/expected_pairwise_queries_dists.txt
@@ -0,0 +1,11 @@
+query_id	ref_id	dist
+sampleQ	sampleQ	0
+sampleQ	sampleN	0
+sampleQ	sample1	1
+sampleQ	sample2	1
+sampleQ	sample3	2
+sampleN	sampleQ	0
+sampleN	sampleN	0
+sampleN	sample1	1
+sampleN	sample2	1
+sampleN	sample3	2
diff --git a/tests/data/irida/queries_iridanext.output.json b/tests/data/irida/queries_iridanext.output.json
@@ -0,0 +1,16 @@
+{
+    "files": {
+        "global": [],
+        "samples": {}
+    },
+    "metadata": {
+        "samples": {
+            "sampleQ": {
+                "address": "2.2.3"
+            },
+            "sampleN": {
+                "address": "2.2.3"
+            }
+        }
+    }
+}
diff --git a/tests/data/profiles/expected-profile_queries1.tsv b/tests/data/profiles/expected-profile_queries1.tsv
@@ -0,0 +1,6 @@
+sample_id	l1	l2	l3
+sampleQ	1	2	1
+sampleN	1	2	1
+sample1	1	1	1
+sample2	1	1	1
+sample3	1	1	2
diff --git a/tests/data/profiles/expected-profile_queries2.tsv b/tests/data/profiles/expected-profile_queries2.tsv
@@ -0,0 +1,3 @@
+sample_id	l1	l2	l3
+sampleQ	1	2	1
+sampleN	1	2	1
diff --git a/tests/data/reports/sampleN.mlst.json b/tests/data/reports/sampleN.mlst.json
@@ -0,0 +1,7 @@
+{
+    "sampleN": {
+        "l1": "1",
+        "l2": "2",
+        "l3": "1"
+    }
+}
diff --git a/tests/data/samplesheets/samplesheet-multiple_queries.csv b/tests/data/samplesheets/samplesheet-multiple_queries.csv
@@ -0,0 +1,6 @@
+sample,mlst_alleles,address
+sampleQ,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleQ.mlst.json,
+sampleN,https://raw.githubusercontent.com/phac-nml/gasnomenclature/update-filter_query/tests/data/reports/sampleN.mlst.json,
+sample1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1
+sample2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1
+sample3,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2
diff --git a/tests/modules/cluster_file/main.nf.test b/tests/modules/cluster_file/main.nf.test
@@ -25,8 +25,8 @@ nextflow_process {
             assert process.success
             assert path("$launchDir/cluster_results").exists()
 
-            // Check expected_clusters
-            def actual_clusters = path("$launchDir/cluster_results/cluster/expected_clusters.txt")
+            // Check reference_clusters file
+            def actual_clusters = path("$launchDir/cluster_results/cluster/reference_clusters.txt")
             def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt")
             assert actual_clusters.text == expected_clusters.text
         }

diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test
@@ -32,6 +32,11 @@ nextflow_pipeline {
             def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt")
             assert actual_distances.text == expected_distances.text
 
+            // Verify cluster file
+            def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
+            def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
+            assert actual_cluster.text == expected_cluster.text
+
             // Check called clusters
             def actual_calls = path("$launchDir/results/call/Called/results.text")
             def expected_calls = path("$baseDir/tests/data/called/expected_results.txt")
@@ -49,6 +54,61 @@ nextflow_pipeline {
         }
     }
 
+    test("Small-scale test of full pipeline with multiple queries"){
+        tag "pipeline_success_multiple_queries"
+
+        when{
+            params {
+                input = "$baseDir/tests/data/samplesheets/samplesheet-multiple_queries.csv"
+                outdir = "results"
+            }
+        }
+
+        then {
+            assert workflow.success
+            assert path("$launchDir/results").exists()
+
+            // Check merged profiles
+            def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv")
+            def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries1.tsv")
+            assert actual_profile_ref.text == expected_profile_tsv.text
+
+            // Check query profiles
+            def actual_profile_query = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv")
+            def expected_profile_query_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries2.tsv")
+            assert actual_profile_query.text == expected_profile_query_tsv.text
+
+            // Check computed pairwise distances
+            def actual_distances = path("$launchDir/results/distances/results.text")
+            def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_queries_dists.txt")
+            assert actual_distances.text == expected_distances.text
+
+            // Verify cluster file
+            def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
+            def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
+            assert actual_cluster.text == expected_cluster.text
+
+            // Check called clusters
+            def actual_calls = path("$launchDir/results/call/Called/results.text")
+            def expected_calls = path("$baseDir/tests/data/called/expected_results_queries.txt")
+            assert actual_calls.text == expected_calls.text
+
+            // Check IRIDA Next JSON output
+            assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/queries_iridanext.output.json").json
+
+            def iridanext_json = path("$launchDir/results/iridanext.output.json").json
+            def iridanext_samples = iridanext_json.files.samples
+            def iridanext_metadata = iridanext_json.metadata.samples
+
+            assert iridanext_metadata.size() == 2
+            assert iridanext_metadata.containsKey("sampleQ")
+            assert iridanext_metadata.containsKey("sampleN")
+
+            assert iridanext_metadata.sampleQ."address" == "2.2.3"
+            assert iridanext_metadata.sampleN.address == "2.2.3"
+        }
+    }
+
     test("Integration test where input contains reference sample with mismatched MLST JSON file"){
         tag "pipeline_failure"
 

diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf
@@ -142,7 +142,9 @@ workflow GAS_NOMENCLATURE {
     ch_versions = ch_versions.mix(called_data.versions)
 
     // Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in
-    new_addresses = FILTER_QUERY(profiles.query, called_data.distances, "tsv", "csv")
+    query_ids = profiles.query.collect { it[0].id }
+
+    new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv")
     ch_versions = ch_versions.mix(new_addresses.versions)
 
     CUSTOM_DUMPSOFTWAREVERSIONS (