From 6d76d501cc0f8a5031c5ce134f9e82b6f040cb81 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 11 Jun 2024 11:55:15 -0400
Subject: [PATCH 1/6] Update filter_query to accomodate multiple queries

---
 modules/local/filter_query/main.nf | 12 +++++++-----
 workflows/gas_nomenclature.nf      |  4 +++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/modules/local/filter_query/main.nf b/modules/local/filter_query/main.nf
index 5bb4b17..9912ee5 100644
--- a/modules/local/filter_query/main.nf
+++ b/modules/local/filter_query/main.nf
@@ -7,7 +7,7 @@ process FILTER_QUERY {
         'biocontainers/csvtk:0.22.0--h9ee0642_1' }"
 
     input:
-    val input_query
+    val query_ids
     path addresses
     val in_format
     val out_format
@@ -17,19 +17,19 @@ process FILTER_QUERY {
     path("versions.yml"),       emit: versions
 
     script:
-
-    def queryID = input_query[0].id
     def outputFile = "new_addresses"
-
     def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
     def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
     def out_extension = out_format == "tsv" ? 'tsv' : 'csv'
 
+    // Join the query IDs in the correct csvtk filter2 required format
+    def queryID = query_ids.collect { id -> "\$id == \"${id}\"" }.join(" || ")
+
     """
     # Filter the query samples only; keep only the 'id' and 'address' columns
     csvtk filter2 \\
         ${addresses} \\
-        --filter '\$id == \"$queryID\"' \\
+        --filter '$queryID' \\
         --delimiter "${delimiter}" \\
         --out-delimiter "${out_delimiter}" | \\
     csvtk cut -f id,address > ${outputFile}.${out_extension}
@@ -39,5 +39,7 @@ process FILTER_QUERY {
         csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
     END_VERSIONS
     """
+
+
 }
 
diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf
index 813de21..9a8913f 100644
--- a/workflows/gas_nomenclature.nf
+++ b/workflows/gas_nomenclature.nf
@@ -142,7 +142,9 @@ workflow GAS_NOMENCLATURE {
     ch_versions = ch_versions.mix(called_data.versions)
 
     // Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in
-    new_addresses = FILTER_QUERY(profiles.query, called_data.distances, "tsv", "csv")
+    query_ids = profiles.query.collect { it[0].id }
+
+    new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv")
     ch_versions = ch_versions.mix(new_addresses.versions)
 
     CUSTOM_DUMPSOFTWAREVERSIONS (

From 59f4d176dedfdad7e144b48e30cc5a4249159a5a Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 11 Jun 2024 11:57:59 -0400
Subject: [PATCH 2/6] Add test for multiple_queries and supporting files

---
 .../data/called/expected_results_queries.txt  |  6 ++
 .../data/irida/queries_iridanext.output.json  | 16 +++++
 .../profiles/expected-profile_queries1.tsv    |  6 ++
 .../profiles/expected-profile_queries2.tsv    |  3 +
 tests/data/reports/sampleN.mlst.json          |  7 +++
 .../samplesheet-multiple_queries.csv          |  6 ++
 tests/pipelines/main.nf.test                  | 60 +++++++++++++++++++
 7 files changed, 104 insertions(+)
 create mode 100644 tests/data/called/expected_results_queries.txt
 create mode 100644 tests/data/irida/queries_iridanext.output.json
 create mode 100644 tests/data/profiles/expected-profile_queries1.tsv
 create mode 100644 tests/data/profiles/expected-profile_queries2.tsv
 create mode 100644 tests/data/reports/sampleN.mlst.json
 create mode 100644 tests/data/samplesheets/samplesheet-multiple_queries.csv

diff --git a/tests/data/called/expected_results_queries.txt b/tests/data/called/expected_results_queries.txt
new file mode 100644
index 0000000..f5e5ae4
--- /dev/null
+++ b/tests/data/called/expected_results_queries.txt
@@ -0,0 +1,6 @@
+id	address	level_1	level_2	level_3
+sample1	1.1.1	1	1	1
+sample2	1.1.1	1	1	1
+sample3	1.1.2	1	1	2
+sampleQ	2.2.3	2	2	3
+sampleN	2.2.3	2	2	3
diff --git a/tests/data/irida/queries_iridanext.output.json b/tests/data/irida/queries_iridanext.output.json
new file mode 100644
index 0000000..7063e8e
--- /dev/null
+++ b/tests/data/irida/queries_iridanext.output.json
@@ -0,0 +1,16 @@
+{
+    "files": {
+        "global": [],
+        "samples": {}
+    },
+    "metadata": {
+        "samples": {
+            "sampleQ": {
+                "address": "2.2.3"
+            },
+            "sampleN": {
+                "address": "2.2.3"
+            }
+        }
+    }
+}
diff --git a/tests/data/profiles/expected-profile_queries1.tsv b/tests/data/profiles/expected-profile_queries1.tsv
new file mode 100644
index 0000000..b2f8100
--- /dev/null
+++ b/tests/data/profiles/expected-profile_queries1.tsv
@@ -0,0 +1,6 @@
+sample_id	l1	l2	l3
+sampleQ	1	2	1
+sampleN	1	2	1
+sample1	1	1	1
+sample2	1	1	1
+sample3	1	1	2
diff --git a/tests/data/profiles/expected-profile_queries2.tsv b/tests/data/profiles/expected-profile_queries2.tsv
new file mode 100644
index 0000000..4b4d059
--- /dev/null
+++ b/tests/data/profiles/expected-profile_queries2.tsv
@@ -0,0 +1,3 @@
+sample_id	l1	l2	l3
+sampleQ	1	2	1
+sampleN	1	2	1
diff --git a/tests/data/reports/sampleN.mlst.json b/tests/data/reports/sampleN.mlst.json
new file mode 100644
index 0000000..178b6db
--- /dev/null
+++ b/tests/data/reports/sampleN.mlst.json
@@ -0,0 +1,7 @@
+{
+    "sampleN": {
+        "l1": "1",
+        "l2": "2",
+        "l3": "1"
+    }
+}
diff --git a/tests/data/samplesheets/samplesheet-multiple_queries.csv b/tests/data/samplesheets/samplesheet-multiple_queries.csv
new file mode 100644
index 0000000..eb661ca
--- /dev/null
+++ b/tests/data/samplesheets/samplesheet-multiple_queries.csv
@@ -0,0 +1,6 @@
+sample,mlst_alleles,address
+sampleQ,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleQ.mlst.json,
+sampleN,/root/working_directory/nml-phac/gasnomenclature/tests/data/reports/sampleN.mlst.json,
+sample1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1
+sample2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1
+sample3,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2
diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test
index e35a9c3..e3a467a 100644
--- a/tests/pipelines/main.nf.test
+++ b/tests/pipelines/main.nf.test
@@ -32,6 +32,11 @@ nextflow_pipeline {
             def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt")
             assert actual_distances.text == expected_distances.text
 
+            // Verify cluster file
+            def actual_cluster = path("$launchDir/results/cluster/expected_clusters.txt")
+            def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
+            assert actual_cluster.text == expected_cluster.text
+
             // Check called clusters
             def actual_calls = path("$launchDir/results/call/Called/results.text")
             def expected_calls = path("$baseDir/tests/data/called/expected_results.txt")
@@ -49,6 +54,61 @@ nextflow_pipeline {
         }
     }
 
+    test("Small-scale test of full pipeline with multiple queries"){
+        tag "pipeline_success_multiple_queries"
+
+        when{
+            params {
+                input = "$baseDir/tests/data/samplesheets/samplesheet-multiple_queries.csv"
+                outdir = "results"
+            }
+        }
+
+        then {
+            assert workflow.success
+            assert path("$launchDir/results").exists()
+
+            // Check merged profiles
+            def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv")
+            def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries1.tsv")
+            assert actual_profile_ref.text == expected_profile_tsv.text
+
+            // Check query profiles
+            def actual_profile_query = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv")
+            def expected_profile_query_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries2.tsv")
+            assert actual_profile_query.text == expected_profile_query_tsv.text
+
+            // Check computed pairwise distances
+            def actual_distances = path("$launchDir/results/distances/results.text")
+            def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_queries_dists.txt")
+            assert actual_distances.text == expected_distances.text
+
+            // Verify cluster file
+            def actual_cluster = path("$launchDir/results/cluster/expected_clusters.txt")
+            def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
+            assert actual_cluster.text == expected_cluster.text
+
+            // Check called clusters
+            def actual_calls = path("$launchDir/results/call/Called/results.text")
+            def expected_calls = path("$baseDir/tests/data/called/expected_results_queries.txt")
+            assert actual_calls.text == expected_calls.text
+
+            // Check IRIDA Next JSON output
+            assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/queries_iridanext.output.json").json
+
+            def iridanext_json = path("$launchDir/results/iridanext.output.json").json
+            def iridanext_samples = iridanext_json.files.samples
+            def iridanext_metadata = iridanext_json.metadata.samples
+
+            assert iridanext_metadata.size() == 2
+            assert iridanext_metadata.containsKey("sampleQ")
+            assert iridanext_metadata.containsKey("sampleN")
+
+            assert iridanext_metadata.sampleQ."address" == "2.2.3"
+            assert iridanext_metadata.sampleN.address == "2.2.3"
+        }
+    }
+
     test("Integration test where input contains reference sample with mismatched MLST JSON file"){
         tag "pipeline_failure"
 

From 0742c50e81c6296209215ea8044061d65101f802 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 11 Jun 2024 11:58:43 -0400
Subject: [PATCH 3/6] Implement 'fair true' in input_check to insure consistent
 ordering of samples

---
 modules/local/input_check/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/local/input_check/main.nf b/modules/local/input_check/main.nf
index 79a2242..762aeae 100644
--- a/modules/local/input_check/main.nf
+++ b/modules/local/input_check/main.nf
@@ -1,6 +1,7 @@
 process INPUT_CHECK{
     tag "Check Sample Inputs and Generate Error Report"
     label 'process_single'
+    fair true
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/python:3.8.3' :

From d1c9809e48e9e61431ce379e4f64c951737d8bc4 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 11 Jun 2024 13:09:56 -0400
Subject: [PATCH 4/6] Update file path in samplesheet

---
 tests/data/samplesheets/samplesheet-multiple_queries.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data/samplesheets/samplesheet-multiple_queries.csv b/tests/data/samplesheets/samplesheet-multiple_queries.csv
index eb661ca..e429a1c 100644
--- a/tests/data/samplesheets/samplesheet-multiple_queries.csv
+++ b/tests/data/samplesheets/samplesheet-multiple_queries.csv
@@ -1,6 +1,6 @@
 sample,mlst_alleles,address
 sampleQ,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleQ.mlst.json,
-sampleN,/root/working_directory/nml-phac/gasnomenclature/tests/data/reports/sampleN.mlst.json,
+sampleN,https://raw.githubusercontent.com/phac-nml/gasnomenclature/update-filter_query/tests/data/reports/sampleN.mlst.json,
 sample1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1
 sample2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1
 sample3,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2

From 07b6c3c87f8f5465a98b8a35aa05a7e534e5a8b2 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 11 Jun 2024 14:02:40 -0400
Subject: [PATCH 5/6] Added missing expected test file

---
 .../distances/expected_pairwise_queries_dists.txt     | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 tests/data/distances/expected_pairwise_queries_dists.txt

diff --git a/tests/data/distances/expected_pairwise_queries_dists.txt b/tests/data/distances/expected_pairwise_queries_dists.txt
new file mode 100644
index 0000000..44aa848
--- /dev/null
+++ b/tests/data/distances/expected_pairwise_queries_dists.txt
@@ -0,0 +1,11 @@
+query_id	ref_id	dist
+sampleQ	sampleQ	0
+sampleQ	sampleN	0
+sampleQ	sample1	1
+sampleQ	sample2	1
+sampleQ	sample3	2
+sampleN	sampleQ	0
+sampleN	sampleN	0
+sampleN	sample1	1
+sampleN	sample2	1
+sampleN	sample3	2

From 15b7090691f90dbb969be9db01218715673201ca Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Wed, 12 Jun 2024 14:24:10 -0400
Subject: [PATCH 6/6] Changed cluster_file output filename for clarity

---
 modules/local/cluster_file/main.nf      | 4 ++--
 tests/modules/cluster_file/main.nf.test | 4 ++--
 tests/pipelines/main.nf.test            | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/local/cluster_file/main.nf b/modules/local/cluster_file/main.nf
index dfb8004..0a97545 100644
--- a/modules/local/cluster_file/main.nf
+++ b/modules/local/cluster_file/main.nf
@@ -6,7 +6,7 @@ process CLUSTER_FILE {
     val meta
 
     output:
-    path("expected_clusters.txt"), emit: text
+    path("reference_clusters.txt"), emit: text
 
     exec:
     def outputLines = []
@@ -37,7 +37,7 @@ process CLUSTER_FILE {
     }
 
     // Write the text file, iterating over each sample
-    task.workDir.resolve("expected_clusters.txt").withWriter { writer ->
+    task.workDir.resolve("reference_clusters.txt").withWriter { writer ->
         outputLines.each { line ->
             writer.writeLine(line)
         }
diff --git a/tests/modules/cluster_file/main.nf.test b/tests/modules/cluster_file/main.nf.test
index 43fd71c..3f13833 100644
--- a/tests/modules/cluster_file/main.nf.test
+++ b/tests/modules/cluster_file/main.nf.test
@@ -25,8 +25,8 @@ nextflow_process {
             assert process.success
             assert path("$launchDir/cluster_results").exists()
 
-            // Check expected_clusters
-            def actual_clusters = path("$launchDir/cluster_results/cluster/expected_clusters.txt")
+            // Check reference_clusters file
+            def actual_clusters = path("$launchDir/cluster_results/cluster/reference_clusters.txt")
             def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt")
             assert actual_clusters.text == expected_clusters.text
         }
diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test
index e3a467a..53ad3d1 100644
--- a/tests/pipelines/main.nf.test
+++ b/tests/pipelines/main.nf.test
@@ -33,7 +33,7 @@ nextflow_pipeline {
             assert actual_distances.text == expected_distances.text
 
             // Verify cluster file
-            def actual_cluster = path("$launchDir/results/cluster/expected_clusters.txt")
+            def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
             def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
             assert actual_cluster.text == expected_cluster.text
 
@@ -84,7 +84,7 @@ nextflow_pipeline {
             assert actual_distances.text == expected_distances.text
 
             // Verify cluster file
-            def actual_cluster = path("$launchDir/results/cluster/expected_clusters.txt")
+            def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
             def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
             assert actual_cluster.text == expected_cluster.text