diff --git a/conf/modules.config b/conf/modules.config index 08fc284..86c0455 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,8 +15,11 @@ process { // Publish directory names assembly_directory_name = "assembly" summary_directory_name = "summary" + profile_dists_directory_name = "distances" + gas_call_directory_name = "call" - locidex_merge_directory_name = [params.outdir , "locidex", "merge"].join(File.separator) + locidex_merge_ref_directory_name = [params.outdir , "locidex", "merge", "reference"].join(File.separator) + locidex_merge_query_directory_name = [params.outdir , "locidex", "merge", "query"].join(File.separator) publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, @@ -56,15 +59,55 @@ process { ] } - withName: LOCIDEX_MERGE { + withName: LOCIDEX_MERGE_REF { publishDir = [ - path: locidex_merge_directory_name, + path: locidex_merge_ref_directory_name, mode: params.publish_dir_mode, pattern: "*/*", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: LOCIDEX_MERGE_QUERY { + publishDir = [ + path: locidex_merge_query_directory_name, + mode: params.publish_dir_mode, + pattern: "*/*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: PROFILE_DISTS { + publishDir = [ + path: { ["${params.outdir}", "${task.profile_dists_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : + filename.contains(File.separator) ? filename.split(File.separator)[-1] : filename } + ] + } + + withName: GAS_CALL { + publishDir = [ + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/thresholds.json" + ], + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/results.{text,parquet}" + ], + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/run.json" + ] + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/conf/test.config b/conf/test.config index 0e0b591..9c17e75 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,5 +20,14 @@ params { max_time = '1.h' // Input data - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' + ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt' } + + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" diff --git a/conf/test_full.config b/conf/test_full.config index c8b5764..4133c10 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,5 +15,14 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' + ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt' } + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" + diff --git a/modules/local/gas/call/main.nf b/modules/local/gas/call/main.nf index 1fe2c64..33db7a7 100644 --- a/modules/local/gas/call/main.nf +++ b/modules/local/gas/call/main.nf @@ -2,7 +2,7 @@ process GAS_CALL{ label "process_high" - tag "Calling: ${meta.id}" + tag "Assigning Nomenclature" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' : @@ -10,17 +10,18 @@ process GAS_CALL{ input: - tuple val(meta), path(reference_clusters), path(distances) + path(reference_clusters) + path(distances) output: - tuple val(meta), path("${prefix}/results.{text,parquet}"), emit: distances, optional: true - tuple val(meta), path("${prefix}/thresholds.json"), emit: thresholds - tuple val(meta), path("${prefix}/run.json"), emit: run + path("${prefix}/results.{text,parquet}"), emit: distances, optional: true + path("${prefix}/thresholds.json"), emit: thresholds + path("${prefix}/run.json"), emit: run path "versions.yml", emit: versions script: // Need to add more args for gas call below - prefix = meta.id + prefix = "Called" """ gas call --dists $distances \\ --rclusters $reference_clusters \\ diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf index bd9f3e8..b58b154 100644 --- a/modules/local/locidex/merge/main.nf +++ b/modules/local/locidex/merge/main.nf @@ -9,17 +9,19 @@ process LOCIDEX_MERGE { 'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }" input: - val input_values // [file(sample1), file(sample2), file(sample3), etc...] + path input_values // [file(sample1), file(sample2), file(sample3), etc...] + val input_tag // makes output unique and denotes the item as the reference or query to preven name collision output: path("${combined_dir}/*.tsv"), emit: combined_profiles - path("${combined_dir}/*.json"), emit: report path "versions.yml", emit: versions script: - combined_dir = "merged" + combined_dir = "merged_${input_tag}" """ locidex merge -i ${input_values.join(' ')} -o ${combined_dir} + + mv ${combined_dir}/*.tsv ${combined_dir}/merged_profiles_${input_tag}.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": locidex merge: \$(echo \$(locidex search -V 2>&1) | sed 's/^.*locidex //' ) diff --git a/modules/local/profile_dists/main.nf b/modules/local/profile_dists/main.nf index 2e48a02..b7a0933 100644 --- a/modules/local/profile_dists/main.nf +++ b/modules/local/profile_dists/main.nf @@ -1,24 +1,25 @@ process PROFILE_DISTS{ label "process_high" - tag "Pairwise Distance Generation: ${meta.id}" + tag "Gathering Distances Between Reference and Query Profiles" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' : 'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }" input: - tuple val(meta), path(query), path(ref) + path query + path ref val mapping_format - path(mapping_file) - path(columns) + path mapping_file + path columns output: - tuple val(meta), path("${prefix}_${mapping_format}/allele_map.json"), emit: allele_map - tuple val(meta), path("${prefix}_${mapping_format}/query_profile.{text,parquet}"), emit: query_profile - tuple val(meta), path("${prefix}_${mapping_format}/ref_profile.{text,parquet}"), emit: ref_profile - tuple val(meta), path("${prefix}_${mapping_format}/results.{text,parquet}"), emit: results - tuple val(meta), path("${prefix}_${mapping_format}/run.json"), emit: run + path("${prefix}/allele_map.json"), emit: allele_map + path("${prefix}/query_profile.{text,parquet}"), emit: query_profile + path("${prefix}/ref_profile.{text,parquet}"), emit: ref_profile + path("${prefix}/results.{text,parquet}"), emit: results + path("${prefix}/run.json"), emit: run path "versions.yml", emit: versions @@ -41,7 +42,7 @@ process PROFILE_DISTS{ args = args + " --count_missing" } // --match_threshold $params.profile_dists.match_thresh \\ - prefix = meta.id + prefix = "distances_${mapping_format}" """ profile_dists --query $query --ref $ref $args --outfmt $mapping_format \\ --distm $params.pd_distm \\ @@ -50,7 +51,7 @@ process PROFILE_DISTS{ --sample_qual_thresh $params.pd_sample_quality_threshold \\ --max_mem ${task.memory.toGiga()} \\ --cpus ${task.cpus} \\ - -o ${prefix}_${mapping_format} + -o ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index bddc99e..89da1c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,23 @@ params { validate_params = true // Profile Dists + pd_outfmt = "pairwise" + pd_distm = "scaled" + pd_missing_threshold = 1.0 + pd_sample_quality_threshold = 1.0 + pd_match_threshold = -1.0 + pd_file_type = "text" + pd_mapping_file = null // default is no file + pd_force = false + pd_skip = false + pd_columns = null + pd_count_missing = true + + + // GAS Call + gm_thresholds = "10,5,0" + gm_delimiter = "'.'" // note the single quotes surrounding the delimiter + ref_clusters = "" } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8639c86..5799dcf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,73 @@ "description": "IRIDA Next Example Pipeline", "type": "object", "definitions": { + "gas_call": { + "title": "GAS Call", + "type": "object", + "description": "", + "default": "", + "properties": { + "gm_thresholds": { + "type": "string", + "default": "10,5,0" + }, + "gm_delimiter": { + "type": "string", + "default": "\\'.\\" + }, + "ref_clusters": { + "type": "string" + } + } + }, + "profile_dists": { + "title": "Profile Dists", + "type": "object", + "description": "", + "default": "", + "properties": { + "pd_outfmt": { + "type": "string", + "default": "pairwise" + }, + "pd_distm": { + "type": "string", + "default": "scaled" + }, + "pd_missing_threshold": { + "type": "number", + "default": 1 + }, + "pd_sample_quality_threshold": { + "type": "number", + "default": 1 + }, + "pd_match_threshold": { + "type": "number", + "default": -1 + }, + "pd_file_type": { + "type": "string", + "default": "text" + }, + "pd_mapping_file": { + "type": "string" + }, + "pd_force": { + "type": "boolean" + }, + "pd_skip": { + "type": "boolean" + }, + "pd_columns": { + "type": "string" + }, + "pd_count_missing": { + "type": "boolean", + "default": true + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -214,6 +281,12 @@ } }, "allOf": [ + { + "$ref": "#/definitions/gas_call" + }, + { + "$ref": "#/definitions/profile_dists" + }, { "$ref": "#/definitions/input_output_options" }, diff --git a/nf-test.config b/nf-test.config index 870799d..2fa82ad 100644 --- a/nf-test.config +++ b/nf-test.config @@ -3,6 +3,6 @@ config { testsDir "tests" workDir ".nf-test" configFile "tests/nextflow.config" - profile "" + profile "docker" } diff --git a/tests/data/called/expected_results.txt b/tests/data/called/expected_results.txt new file mode 100644 index 0000000..8a3eec9 --- /dev/null +++ b/tests/data/called/expected_results.txt @@ -0,0 +1,5 @@ +id address level_1 level_2 level_3 +sample1 1.1.1 1 1 1 +sample2 1.1.1 1 1 1 +sample3 2.2.2 2 2 2 +sampleQ 1.1.1 1 1 1 diff --git a/tests/data/distances/expected_pairwise_dists.txt b/tests/data/distances/expected_pairwise_dists.txt new file mode 100644 index 0000000..df58510 --- /dev/null +++ b/tests/data/distances/expected_pairwise_dists.txt @@ -0,0 +1,4 @@ +query_id ref_id dist +sampleQ sample1 0.0 +sampleQ sample2 33.333333333333336 +sampleQ sample3 66.66666666666667 diff --git a/tests/data/profiles/expected-profile1.tsv b/tests/data/profiles/expected-profile1.tsv index 233f6e4..9b938e1 100644 --- a/tests/data/profiles/expected-profile1.tsv +++ b/tests/data/profiles/expected-profile1.tsv @@ -1,4 +1,4 @@ -sample_id l1 l2 l3 -sample1 1 1 1 -sample2 1 1 1 -sample3 1 1 2 +sample_id l1 l2 l3 +sample1 1 1 1 +sample2 1 1 1 +sample3 1 1 2 diff --git a/tests/data/reports/sample1.mlst.json b/tests/data/reports/sample1.mlst.json index 393f0ac..01bc774 100644 --- a/tests/data/reports/sample1.mlst.json +++ b/tests/data/reports/sample1.mlst.json @@ -1,7 +1,7 @@ -{ - "sample1": { - "l1": "1", - "l2": "1", - "l3": "1" - } -} +{ + "sample1": { + "l1": "1", + "l2": "1", + "l3": "1" + } +} diff --git a/tests/data/reports/sample2.mlst.json b/tests/data/reports/sample2.mlst.json index 9af0a4c..7c0426c 100644 --- a/tests/data/reports/sample2.mlst.json +++ b/tests/data/reports/sample2.mlst.json @@ -1,7 +1,7 @@ -{ - "sample2": { - "l1": "1", - "l2": "1", - "l3": "1" - } -} +{ + "sample2": { + "l1": "1", + "l2": "1", + "l3": "1" + } +} diff --git a/tests/data/reports/sample3.mlst.json b/tests/data/reports/sample3.mlst.json index 88c3d0c..43ea3c7 100644 --- a/tests/data/reports/sample3.mlst.json +++ b/tests/data/reports/sample3.mlst.json @@ -1,7 +1,7 @@ -{ - "sample3": { - "l1": "1", - "l2": "1", - "l3": "2" - } -} +{ + "sample3": { + "l1": "1", + "l2": "1", + "l3": "2" + } +} diff --git a/tests/modules/local/assemblystub/main.nf.test b/tests/modules/local/assemblystub/main.nf.test deleted file mode 100644 index 881bf56..0000000 --- a/tests/modules/local/assemblystub/main.nf.test +++ /dev/null @@ -1,38 +0,0 @@ -nextflow_process { - - name "Test Process ASSEMBLY_STUB" - script "modules/local/assemblystub/main.nf" - process "ASSEMBLY_STUB" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")]) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert assembly.size() == 1 - - // parse assembly file - def assembly_header = path(assembly.get(0)[1]).linesGzip[0] - def assembly_body = path(assembly.get(0)[1]).linesGzip[1] - - assert assembly_header.equals(">SAMPLE1-stub-assembly") - assert assembly_body.equals("ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTTAAAAACCCCCGGGGGTTTTT") - } - } - - } - -} diff --git a/tests/modules/local/generatesamplejson/main.nf.test b/tests/modules/local/generatesamplejson/main.nf.test deleted file mode 100644 index ac071a3..0000000 --- a/tests/modules/local/generatesamplejson/main.nf.test +++ /dev/null @@ -1,40 +0,0 @@ -nextflow_process { - - name "Test Process GENERATE_SAMPLE_JSON" - script "modules/local/generatesamplejson/main.nf" - process "GENERATE_SAMPLE_JSON" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")], file("SAMPLE1.assembly.fa.gz")) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert json.size() == 1 - - // parse output json file - def sample_json_string = path(json.get(0)[1]).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def sample_json = parser.parseText(sample_json_string) - - assert sample_json.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - assert sample_json.metadata.samples.SAMPLE1.reads[0].equals("sample1_R1.fastq.gz") - assert sample_json.metadata.samples.SAMPLE1.reads[1].equals("sample1_R2.fastq.gz") - } - } - - } - -} diff --git a/tests/modules/local/generatesummary/main.nf.test b/tests/modules/local/generatesummary/main.nf.test deleted file mode 100644 index b2eb189..0000000 --- a/tests/modules/local/generatesummary/main.nf.test +++ /dev/null @@ -1,37 +0,0 @@ -nextflow_process { - - name "Test Process GENERATE_SUMMARY" - script "modules/local/generatesummary/main.nf" - process "GENERATE_SUMMARY" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = [new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")], file("SAMPLE1.assembly.fa.gz"))] - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert summary.size() == 1 - - assert path(summary.get(0)).linesGzip[0].equals("IRIDANEXTEXAMPLE Pipeline Summary") - assert path(summary.get(0)).linesGzip[4].equals("SAMPLE1:") - assert path(summary.get(0)).linesGzip[5].contains("reads.1: ") - assert path(summary.get(0)).linesGzip[6].contains("reads.2: ") - assert path(summary.get(0)).linesGzip[7].contains("assembly: ") - } - } - - } - -} diff --git a/tests/modules/local/iridanextoutput/main.nf.test b/tests/modules/local/iridanextoutput/main.nf.test deleted file mode 100644 index 72808ab..0000000 --- a/tests/modules/local/iridanextoutput/main.nf.test +++ /dev/null @@ -1,51 +0,0 @@ -nextflow_process { - - name "Test Process IRIDA_NEXT_OUTPUT" - script "modules/local/iridanextoutput/main.nf" - process "IRIDA_NEXT_OUTPUT" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = [file("$baseDir/tests/data/SAMPLE1.simple.json.gz"), file("$baseDir/tests/data/SAMPLE2.simple.json.gz"), file("$baseDir/tests/data/SAMPLE3.simple.json.gz")] - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert output_json.size() == 1 - - // parse output json file - def json_string = path(output_json.get(0)).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def irida_json = parser.parseText(json_string) - - assert irida_json.files.global[0].path.equals("summary/summary.txt.gz") - - assert irida_json.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - assert irida_json.files.samples.SAMPLE2[0].path.equals("assembly/SAMPLE2.assembly.fa.gz") - assert irida_json.files.samples.SAMPLE3[0].path.equals("assembly/SAMPLE3.assembly.fa.gz") - - assert irida_json.metadata.samples.SAMPLE1.'reads.1'.equals("sample1_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE1.'reads.2'.equals("sample1_R2.fastq.gz") - - assert irida_json.metadata.samples.SAMPLE2.'reads.1'.equals("sample2_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE2.'reads.2'.equals("sample2_R2.fastq.gz") - - assert irida_json.metadata.samples.SAMPLE3.'reads.1'.equals("sample1_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE3.'reads.2'.equals("null") - } - } - - } - -} diff --git a/tests/modules/local/simplifyiridajson/main.nf.test b/tests/modules/local/simplifyiridajson/main.nf.test deleted file mode 100644 index 7d61567..0000000 --- a/tests/modules/local/simplifyiridajson/main.nf.test +++ /dev/null @@ -1,41 +0,0 @@ -nextflow_process { - - name "Test Process SIMPLIFY_IRIDA_JSON" - script "modules/local/simplifyiridajson/main.nf" - process "SIMPLIFY_IRIDA_JSON" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], file("$baseDir/tests/data/SAMPLE1.json.gz")) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert simple_json.size() == 1 - - // parse output json file - def json_string = path(simple_json.get(0)[1]).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def json_simple = parser.parseText(json_string) - - assert json_simple.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - - assert json_simple.metadata.samples.SAMPLE1.'reads.1'.equals("sample1_R1.fastq.gz") - assert json_simple.metadata.samples.SAMPLE1.'reads.2'.equals("sample1_R2.fastq.gz") - } - } - - } - -} diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..672dc69 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,16 @@ Nextflow config file for running tests ======================================================================================== */ + + +params.max_memory = "2.GB" +params.max_cpus = 1 +params.ref_clusters = "$baseDir/tests/data/clusters/expected_clusters.txt" + + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test new file mode 100644 index 0000000..911af92 --- /dev/null +++ b/tests/pipelines/main.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Integration test of nomenclature assignment pipeline" + script "main.nf" + + test("Small-scale test of full pipeline"){ + tag "pipeline" + + when{ + params { + input = "$baseDir/tests/data/samplesheets/samplesheet1.csv" + outdir = "results" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check merged profiles + // TODO check query profile is merged + def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv") + def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile1.tsv") + assert actual_profile_ref.text == expected_profile_tsv.text + + + // Check computed pairwise distances + def actual_distances = path("$launchDir/results/distances/results.text") + def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt") + assert actual_distances.text == expected_distances.text + + // Check called clusters + def actual_calls = path("$launchDir/results/call/Called/results.text") + def expected_calls = path("$baseDir/tests/data/called/expected_results.txt") + assert actual_calls.text == expected_calls.text + } + } + + +} diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index 453a922..a0befa9 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -48,6 +48,23 @@ include { PROFILE_DISTS } from "../modules/local/profile_dists/main" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +def prepareFilePath(String filep, GString debug_msg){ + // Rerturns null if a file is not valid + def return_path = null + if(filep){ + file_in = path(filep) + if(file_in.exists()){ + return_path = file_in + log.debug debug_msg + } + }else{ + return_path = [] + } + + return return_path // empty value if file argument is null +} + workflow GAS_NOMENCLATURE { ch_versions = Channel.empty() @@ -58,15 +75,51 @@ workflow GAS_NOMENCLATURE { profiles = input.branch{ ref: it[0].profile_type query: !it[0].profile_type - errors: true // TODO add in check on file for erroneous values, may not be needed as nf-validation is working + errors: true // To discuss, add in check on file for erroneous values, may not be needed as nf-validation is working } reference_values = profiles.ref.collect{ meta, profile -> profile} - query_values = profile.query.collect{ meta, profile -> proifile } - reference_values.view() - query_values.view() - //LOCIDEX_MERGE_REF(reference_values) - //LOCIDEX_MERGE_QUERY(query_values) + query_values = profiles.query.collect{ meta, profile -> profile } + + // LOCIDEX modules + ref_tag = Channel.value("ref") + query_tag = Channel.value("value") + merged_references = LOCIDEX_MERGE_REF(reference_values, ref_tag) + ch_versions = ch_versions.mix(merged_references.versions) + + merged_queries = LOCIDEX_MERGE_QUERY(query_values, query_tag) + ch_versions = ch_versions.mix(merged_queries.versions) + + + // PROFILE DISTS processes + + mapping_file = prepareFilePath(params.pd_mapping_file, "Selecting ${params.pd_mapping_file} for --pd_mapping_file") + if(mapping_file == null){ + exit 1, "${params.pd_mapping_file}: Does not exist but was passed to the pipeline. Exiting now." + } + + + columns_file = prepareFilePath(params.pd_columns, "Selecting ${params.pd_columns} for --pd_mapping_file") + if(columns_file == null){ + exit 1, "${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now." + } + + mapping_format = Channel.value(params.pd_outfmt) + + distances = PROFILE_DISTS(merged_queries.combined_profiles, + merged_references.combined_profiles, + mapping_format, + mapping_file, + columns_file) + + ch_versions = ch_versions.mix(distances.versions) + + // GAS CALL + + clusters = Channel.fromPath(params.ref_clusters, checkIfExists: true) + called_data = GAS_CALL(clusters, distances.results) + + ch_versions = ch_versions.mix(called_data.versions) // A channel of tuples of ({meta}, [read[0], read[1]], assembly) @@ -93,9 +146,10 @@ workflow GAS_NOMENCLATURE { //) //ch_versions = ch_versions.mix(IRIDA_NEXT_OUTPUT.out.versions) - //CUSTOM_DUMPSOFTWAREVERSIONS ( - // ch_versions.unique().collectFile(name: 'collated_versions.yml') - //) + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + }