Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update filter_query module to accommodate multiple queries #14

Merged
merged 6 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions modules/local/cluster_file/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process CLUSTER_FILE {
val meta

output:
path("expected_clusters.txt"), emit: text
path("reference_clusters.txt"), emit: text

exec:
def outputLines = []
Expand Down Expand Up @@ -37,7 +37,7 @@ process CLUSTER_FILE {
}

// Write the text file, iterating over each sample
task.workDir.resolve("expected_clusters.txt").withWriter { writer ->
task.workDir.resolve("reference_clusters.txt").withWriter { writer ->
outputLines.each { line ->
writer.writeLine(line)
}
Expand Down
12 changes: 7 additions & 5 deletions modules/local/filter_query/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process FILTER_QUERY {
'biocontainers/csvtk:0.22.0--h9ee0642_1' }"

input:
val input_query
val query_ids
path addresses
val in_format
val out_format
Expand All @@ -17,19 +17,19 @@ process FILTER_QUERY {
path("versions.yml"), emit: versions

script:

def queryID = input_query[0].id
def outputFile = "new_addresses"

def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
def out_extension = out_format == "tsv" ? 'tsv' : 'csv'

// Join the query IDs in the correct csvtk filter2 required format
def queryID = query_ids.collect { id -> "\$id == \"${id}\"" }.join(" || ")

"""
# Filter the query samples only; keep only the 'id' and 'address' columns
csvtk filter2 \\
${addresses} \\
--filter '\$id == \"$queryID\"' \\
--filter '$queryID' \\
--delimiter "${delimiter}" \\
--out-delimiter "${out_delimiter}" | \\
csvtk cut -f id,address > ${outputFile}.${out_extension}
Expand All @@ -39,5 +39,7 @@ process FILTER_QUERY {
csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
END_VERSIONS
"""


}

1 change: 1 addition & 0 deletions modules/local/input_check/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
process INPUT_CHECK{
tag "Check Sample Inputs and Generate Error Report"
label 'process_single'
fair true

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.8.3' :
Expand Down
6 changes: 6 additions & 0 deletions tests/data/called/expected_results_queries.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id address level_1 level_2 level_3
sample1 1.1.1 1 1 1
sample2 1.1.1 1 1 1
sample3 1.1.2 1 1 2
sampleQ 2.2.3 2 2 3
sampleN 2.2.3 2 2 3
11 changes: 11 additions & 0 deletions tests/data/distances/expected_pairwise_queries_dists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
query_id ref_id dist
sampleQ sampleQ 0
sampleQ sampleN 0
sampleQ sample1 1
sampleQ sample2 1
sampleQ sample3 2
sampleN sampleQ 0
sampleN sampleN 0
sampleN sample1 1
sampleN sample2 1
sampleN sample3 2
16 changes: 16 additions & 0 deletions tests/data/irida/queries_iridanext.output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"files": {
"global": [],
"samples": {}
},
"metadata": {
"samples": {
"sampleQ": {
"address": "2.2.3"
},
"sampleN": {
"address": "2.2.3"
}
}
}
}
6 changes: 6 additions & 0 deletions tests/data/profiles/expected-profile_queries1.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample_id l1 l2 l3
sampleQ 1 2 1
sampleN 1 2 1
sample1 1 1 1
sample2 1 1 1
sample3 1 1 2
3 changes: 3 additions & 0 deletions tests/data/profiles/expected-profile_queries2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sample_id l1 l2 l3
sampleQ 1 2 1
sampleN 1 2 1
7 changes: 7 additions & 0 deletions tests/data/reports/sampleN.mlst.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"sampleN": {
"l1": "1",
"l2": "2",
"l3": "1"
}
}
6 changes: 6 additions & 0 deletions tests/data/samplesheets/samplesheet-multiple_queries.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample,mlst_alleles,address
sampleQ,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleQ.mlst.json,
sampleN,https://raw.githubusercontent.com/phac-nml/gasnomenclature/update-filter_query/tests/data/reports/sampleN.mlst.json,
sample1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1
sample2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1
sample3,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2
4 changes: 2 additions & 2 deletions tests/modules/cluster_file/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ nextflow_process {
assert process.success
assert path("$launchDir/cluster_results").exists()

// Check expected_clusters
def actual_clusters = path("$launchDir/cluster_results/cluster/expected_clusters.txt")
// Check reference_clusters file
def actual_clusters = path("$launchDir/cluster_results/cluster/reference_clusters.txt")
def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt")
assert actual_clusters.text == expected_clusters.text
}
Expand Down
60 changes: 60 additions & 0 deletions tests/pipelines/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ nextflow_pipeline {
def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt")
assert actual_distances.text == expected_distances.text

// Verify cluster file
def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
assert actual_cluster.text == expected_cluster.text

// Check called clusters
def actual_calls = path("$launchDir/results/call/Called/results.text")
def expected_calls = path("$baseDir/tests/data/called/expected_results.txt")
Expand All @@ -49,6 +54,61 @@ nextflow_pipeline {
}
}

test("Small-scale test of full pipeline with multiple queries"){
tag "pipeline_success_multiple_queries"

when{
params {
input = "$baseDir/tests/data/samplesheets/samplesheet-multiple_queries.csv"
outdir = "results"
}
}

then {
assert workflow.success
assert path("$launchDir/results").exists()

// Check merged profiles
def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv")
def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries1.tsv")
assert actual_profile_ref.text == expected_profile_tsv.text

// Check query profiles
def actual_profile_query = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv")
def expected_profile_query_tsv = path("$baseDir/tests/data/profiles/expected-profile_queries2.tsv")
assert actual_profile_query.text == expected_profile_query_tsv.text

// Check computed pairwise distances
def actual_distances = path("$launchDir/results/distances/results.text")
def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_queries_dists.txt")
assert actual_distances.text == expected_distances.text

// Verify cluster file
def actual_cluster = path("$launchDir/results/cluster/reference_clusters.txt")
def expected_cluster = path("$baseDir/tests/data/clusters/expected_clusters.txt")
assert actual_cluster.text == expected_cluster.text

// Check called clusters
def actual_calls = path("$launchDir/results/call/Called/results.text")
def expected_calls = path("$baseDir/tests/data/called/expected_results_queries.txt")
assert actual_calls.text == expected_calls.text

// Check IRIDA Next JSON output
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/queries_iridanext.output.json").json

def iridanext_json = path("$launchDir/results/iridanext.output.json").json
def iridanext_samples = iridanext_json.files.samples
def iridanext_metadata = iridanext_json.metadata.samples

assert iridanext_metadata.size() == 2
assert iridanext_metadata.containsKey("sampleQ")
assert iridanext_metadata.containsKey("sampleN")

assert iridanext_metadata.sampleQ."address" == "2.2.3"
assert iridanext_metadata.sampleN.address == "2.2.3"
}
}

test("Integration test where input contains reference sample with mismatched MLST JSON file"){
tag "pipeline_failure"

Expand Down
4 changes: 3 additions & 1 deletion workflows/gas_nomenclature.nf
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ workflow GAS_NOMENCLATURE {
ch_versions = ch_versions.mix(called_data.versions)

// Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in
new_addresses = FILTER_QUERY(profiles.query, called_data.distances, "tsv", "csv")
query_ids = profiles.query.collect { it[0].id }

new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv")
ch_versions = ch_versions.mix(new_addresses.versions)

CUSTOM_DUMPSOFTWAREVERSIONS (
Expand Down
Loading