From f7ff5bb8393ed705d5dbb39de14f2ddf99484c90 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 14:00:34 -0400 Subject: [PATCH 01/16] Add sample-name column for input feature --- assets/schema_input.json | 7 +++++- conf/iridanext.config | 5 ++-- .../samplesheets/samplesheet-sample_name.csv | 5 ++++ workflows/gas_nomenclature.nf | 25 ++++++++++++++++++- 4 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 tests/data/samplesheets/samplesheet-sample_name.csv diff --git a/assets/schema_input.json b/assets/schema_input.json index 6094f92..dbb4a4f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,10 +10,15 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "meta": ["id"], + "meta": ["irida_id"], "unique": true, "errorMessage": "Sample name must be provided and cannot contain spaces" }, + "sample_name": { + "type": "string", + "meta": ["id"], + "errorMessage": "Sample name is optional, if provided will replace sample for filenames and outputs" + }, "mlst_alleles": { "type": "string", "format": "file-path", diff --git a/conf/iridanext.config b/conf/iridanext.config index ce9ad72..314a5e7 100644 --- a/conf/iridanext.config +++ b/conf/iridanext.config @@ -4,12 +4,13 @@ iridanext { path = "${params.outdir}/iridanext.output.json.gz" overwrite = true files { - samples = ["**/input/*_error_report.csv"] + samples = ["${params.outdir}/input/*_error_report.csv"] } metadata { + idkey = "id_irida" samples { csv { - path = "**/filter/new_addresses.csv" + path = "${params.outdir}/filter/new_addresses.csv" idcol = "id" } } diff --git a/tests/data/samplesheets/samplesheet-sample_name.csv b/tests/data/samplesheets/samplesheet-sample_name.csv new file mode 100644 index 0000000..c6c73d5 --- /dev/null +++ b/tests/data/samplesheets/samplesheet-sample_name.csv @@ -0,0 +1,5 @@ +sample,sample_name,mlst_alleles,address +sampleQ,sample 1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleQ.mlst.json, +sample1,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1 +sample2,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1 +sample3,,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2 diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index 4daaf71..4101a69 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -68,18 +68,41 @@ workflow GAS_NOMENCLATURE { ch_versions = Channel.empty() + // Track processed IDs + def processedIDs = [] as Set + // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema input = Channel.fromSamplesheet("input") + // and remove non-alphanumeric characters in sample_names (meta.id), whilst also correcting for duplicate sample_names (meta.id) + .map { meta, mlst_file -> + if (!meta.id) { + meta.id = meta.irida_id + } else { + // Non-alphanumeric characters (excluding _,-,.) will be replaced with "_" + meta.id = meta.id.replaceAll(/[^A-Za-z0-9_.\-]/, '_') + } + // Ensure ID is unique by appending meta.irida_id if needed + while (processedIDs.contains(meta.id)) { + meta.id = "${meta.id}_${meta.irida_id}" + } + // Add the ID to the set of processed IDs + processedIDs << meta.id + + tuple(meta, mlst_file)} + + // Ensure meta.id and mlst_file keys match; generate error report for samples where id ≠ key input_assure = INPUT_ASSURE(input) ch_versions = ch_versions.mix(input_assure.versions) - // Prepare reference and query TSV files for LOCIDEX_MERGE + // Collect samples without address profiles = input_assure.result.branch { query: !it[0].address } + + // Prepare reference and query TSV files for LOCIDEX_MERGE reference_values = input_assure.result.collect{ meta, mlst -> mlst} query_values = profiles.query.collect{ meta, mlst -> mlst } From 367be53b6daad5d44531790ebdb2e3c1c96be273 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 14:18:52 -0400 Subject: [PATCH 02/16] Fixed change that broke nf-test --- conf/iridanext.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/iridanext.config b/conf/iridanext.config index 314a5e7..63e181e 100644 --- a/conf/iridanext.config +++ b/conf/iridanext.config @@ -4,13 +4,13 @@ iridanext { path = "${params.outdir}/iridanext.output.json.gz" overwrite = true files { - samples = ["${params.outdir}/input/*_error_report.csv"] + samples = ["**/input/*_error_report.csv"] } metadata { idkey = "id_irida" samples { csv { - path = "${params.outdir}/filter/new_addresses.csv" + path = "**/filter/new_addresses.csv" idcol = "id" } } From 8a5a4a37c4d5ffa74794cdc30772120dee88e0c0 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 15:33:53 -0400 Subject: [PATCH 03/16] added nf-tests with tag add-sample-name --- .../sample_name_add_iridanext.output.json | 31 +++++++++++++++ tests/pipelines/main.nf.test | 39 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/data/irida/sample_name_add_iridanext.output.json diff --git a/tests/data/irida/sample_name_add_iridanext.output.json b/tests/data/irida/sample_name_add_iridanext.output.json new file mode 100644 index 0000000..acc2745 --- /dev/null +++ b/tests/data/irida/sample_name_add_iridanext.output.json @@ -0,0 +1,31 @@ +{ + "files": { + "global": [ + + ], + "samples": { + "sample_1": [ + { + "path": "input/sample_1_error_report.csv" + } + ], + "sample_2_sample2": [ + { + "path": "input/sample_2_sample2_error_report.csv" + } + ], + "sample_2": [ + { + "path": "input/sample_2_error_report.csv" + } + ] + } + }, + "metadata": { + "samples": { + "sample_1": { + "address": "1.1.3" + } + } + } +} \ No newline at end of file diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 97a935e..5352627 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -354,4 +354,43 @@ nextflow_pipeline { assert (workflow.stdout =~ /sample2_empty.mlst.json is missing the 'profile' section or is completely empty!/).find() } } + + test("Testing when sample_name column is included on input"){ + // For integration in IRIDA-Next there needs to be an option to have the input file include a sample_name column + + tag "add-sample-name" + + when{ + params { + input = "$baseDir/tests/data/samplesheets/samplesheet-sample_name.csv" + outdir = "results" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check outputs + def lines = [] + + // Ensure that the error_reports are generated for query and reference samples based on sample_name swap with sample + lines = path("$launchDir/results/input/sample_1_error_report.csv").readLines() + assert lines.contains("sample_1,[\'sampleQ\'],Query sample_1 ID and JSON key in sampleQ.mlst.json DO NOT MATCH. The 'sampleQ' key in sampleQ.mlst.json has been forcefully changed to 'sample_1': User should manually check input files to ensure correctness.") + + lines = path("$launchDir/results/input/sample_2_error_report.csv").readLines() + assert lines.contains("sample_2,[\'sample1\'],Reference sample_2 ID and JSON key in sample1.mlst.json DO NOT MATCH. The 'sample1' key in sample1.mlst.json has been forcefully changed to 'sample_2': User should manually check input files to ensure correctness.") + + lines = path("$launchDir/results/input/sample_2_sample2_error_report.csv").readLines() + assert lines.contains("sample_2_sample2,[\'sample2\'],Reference sample_2_sample2 ID and JSON key in sample2.mlst.json DO NOT MATCH. The 'sample2' key in sample2.mlst.json has been forcefully changed to 'sample_2_sample2': User should manually check input files to ensure correctness.") + + // Check filter_query csv file + lines = path("$launchDir/results/filter/new_addresses.csv").readLines() + assert lines.contains("sample_1,1.1.3") + + // Check IRIDA Next JSON output + assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/sample_name_add_iridanext.output.json").json + + } + } } From e230ae8a6a35c9f4d439b0c9eb8b5ddf56e0d430 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 15:44:08 -0400 Subject: [PATCH 04/16] Made exception for sample_name JSON file for nf-test --- .prettierignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.prettierignore b/.prettierignore index 437d763..4e64634 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,4 @@ testing/ testing* *.pyc bin/ +tests/data/irida/sample_name_add_iridanext.output.json From 3f611108a94db65991519a533041e0b801774ed7 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 15:56:57 -0400 Subject: [PATCH 05/16] Make exception for editor config for nf-test json --- .editorconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.editorconfig b/.editorconfig index 9b99008..fc33cb7 100644 --- a/.editorconfig +++ b/.editorconfig @@ -30,3 +30,8 @@ indent_style = unset # ignore python [*.{py}] indent_style = unset + +# ignore nf-test json file +[tests/data/irida/sample_name_add_iridanext.output.json] +end_of_line = unset +trim_trailing_whitespace = unset From b17c87b78ed723bf84993d7516a951e75c03c752 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 2 Oct 2024 15:58:47 -0400 Subject: [PATCH 06/16] Make exception for editor config for nf-test json --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index fc33cb7..68c0e00 100644 --- a/.editorconfig +++ b/.editorconfig @@ -33,5 +33,5 @@ indent_style = unset # ignore nf-test json file [tests/data/irida/sample_name_add_iridanext.output.json] -end_of_line = unset +insert_final_newline = unset trim_trailing_whitespace = unset From 2cd6daeded59c6a6e9dcd72bdb724e4ec4c57c09 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 3 Oct 2024 09:38:50 -0400 Subject: [PATCH 07/16] Update documentation --- CHANGELOG.md | 9 +++++++++ README.md | 10 ++++++++++ docs/usage.md | 26 +++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de1d532..ade895c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Development + +### Changed + +- Added the ability to include a `sample_name` column in the input samplesheet.csv. Allows for compatibility with IRIDA-Next input configuration. + - `sample_name` special characters will be replaced with `"_"` + - If no `sample_name` is supplied in the column `sample` will be used + - To avoid repeat values for `sample_name` all `sample_name` values will be suffixed with the unique `sample` value from the input file + ## [0.2.3] - 2024/09/25 ### `Changed` diff --git a/README.md b/README.md index 499f513..f5fc7f9 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,16 @@ The structure of this file is defined in [assets/schema_input.json](assets/schem Details on the columns can be found in the [Full samplesheet](docs/usage.md#full-samplesheet) documentation. +## IRIDA-Next Optional Input Configuration + +`gasnomenclature` accepts the [IRIDA-Next](https://github.com/phac-nml/irida-next) format for samplesheets which can contain an additional column: `sample_name` + +`sample_name`: An **optional** column, that overrides `sample` for outputs (filenames and sample names) and reference assembly identification. + +`sample_name`, allows more flexibility in naming output files or sample identification. Unlike `sample`, `sample_name` is not required to contain unique values. `Nextflow` requires unique sample names, and therefore in the instance of repeat `sample_names`, `sample` will be suffixed to any `sample_name`. Non-alphanumeric characters (excluding `_`,`-`,`.`) will be replaced with `"_"`. + +An [example samplesheet](tests/data/samplesheets/samplesheet-sample_name.csv) has been provided with the pipeline. + # Parameters The main parameters are `--input` as defined above and `--output` for specifying the output results directory. You may wish to provide `-profile singularity` to specify the use of singularity containers and `-r [branch]` to specify which GitHub branch you would like to run. diff --git a/docs/usage.md b/docs/usage.md index 2433443..36587a2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Full samplesheet +### Full Standard Samplesheet The input samplesheet must contain three columns: `sample`, `mlst_alleles`, `address`. The sample names within a samplesheet should be unique. All other columns will be ignored. @@ -33,6 +33,28 @@ sampleF,sampleF.mlst.json, An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +### IRIDA-Next Optional Samplesheet Configuration + +`gasnomenclature` accepts the [IRIDA-Next](https://github.com/phac-nml/irida-next) format for samplesheets which contain the following columns: `sample`, `sample_name`, `mlst_alleles`, `address`. The sample IDs within a samplesheet should be unique. + +A final samplesheet file consisting of mlst_alleles and addresses may look something like the one below: + +```csv title="samplesheet.csv" +sample,sample_name,mlst_alleles,address +sampleA,S1,sampleA.mlst.json.gz,1.1.1 +sampleQ,S2,sampleQ.mlst.json.gz,2.2.2 +sampleF,,sampleF.mlst.json, +``` + +| Column | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. Samples should be unique within a samplesheet. | +| `sample_name` | Sample name used in outputs (filenames and sample names) | +| `mlst_alleles` | Full path to an MLST JSON file describing the loci/alleles for the sample against some MLST scheme. A way to generate this file is via [locidex]. File can optionally be gzipped and must have the extension ".mlst.json", ".mlst.subtyping.json" (or with an additional ".gz" if gzipped). | +| `address` | Hierarchal clustering address. If left empty for a sample, the pipeline will assign a cluster address. | + +An [example samplesheet](tests/data/samplesheets/samplesheet-sample_name.csv) has been provided with the pipeline. + ## Running the pipeline The typical command for running the pipeline is as follows: @@ -185,3 +207,5 @@ We recommend adding the following line to your environment to limit this (typica ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` + +[locidex]: https://github.com/phac-nml/locidex From a2460149377d2020b05ff9136e8da3f06d0658ca Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 14:47:16 -0400 Subject: [PATCH 08/16] Added irida-id to new_addresses output to be compatible with IRIDA-Next --- conf/iridanext.config | 10 ++++++--- docs/output.md | 2 +- modules/local/filter_query/main.nf | 9 +++++--- .../sample_name_add_iridanext.output.json | 22 +++++++++++++------ .../samplesheets/samplesheet-sample_name.csv | 1 + tests/pipelines/main.nf.test | 19 ++++++++-------- workflows/gas_nomenclature.nf | 14 +++++++++--- 7 files changed, 51 insertions(+), 26 deletions(-) diff --git a/conf/iridanext.config b/conf/iridanext.config index 63e181e..95a423e 100644 --- a/conf/iridanext.config +++ b/conf/iridanext.config @@ -4,14 +4,18 @@ iridanext { path = "${params.outdir}/iridanext.output.json.gz" overwrite = true files { + idkey = "irida_id" samples = ["**/input/*_error_report.csv"] } metadata { - idkey = "id_irida" samples { + keep = [ + "address" + ] csv { - path = "**/filter/new_addresses.csv" - idcol = "id" + path = "**/filter/new_addresses.tsv" + sep = "\t" + idcol = 'irida_id' } } } diff --git a/docs/output.md b/docs/output.md index 27a33c2..6d66634 100644 --- a/docs/output.md +++ b/docs/output.md @@ -93,7 +93,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Output files - `filter/` - - `new_addresses.csv` + - `new_addresses.tsv` diff --git a/modules/local/filter_query/main.nf b/modules/local/filter_query/main.nf index fec0ea1..a196ffa 100644 --- a/modules/local/filter_query/main.nf +++ b/modules/local/filter_query/main.nf @@ -13,7 +13,7 @@ process FILTER_QUERY { val out_format output: - path("new_addresses.*"), emit: csv + path("new_addresses.*"), emit: tsv path("versions.yml"), emit: versions script: @@ -24,13 +24,16 @@ process FILTER_QUERY { """ # Filter the query samples only; keep only the 'id' and 'address' columns + csvtk cut -t -f 2 ${query_ids} > query_list.txt # Need to use the second column to pull meta.id because there is no header + csvtk add-header ${query_ids} -t -n irida_id,id > id.txt csvtk grep \\ ${addresses} \\ -f 1 \\ - -P ${query_ids} \\ + -P query_list.txt \\ --delimiter "${delimiter}" \\ --out-delimiter "${out_delimiter}" | \\ - csvtk cut -f id,address > ${outputFile}.${out_extension} + csvtk cut -t -f id,address > tmp.tsv + csvtk join -t -f id id.txt tmp.tsv > ${outputFile}.${out_extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/data/irida/sample_name_add_iridanext.output.json b/tests/data/irida/sample_name_add_iridanext.output.json index acc2745..92fce05 100644 --- a/tests/data/irida/sample_name_add_iridanext.output.json +++ b/tests/data/irida/sample_name_add_iridanext.output.json @@ -1,30 +1,38 @@ { "files": { "global": [ - + ], "samples": { - "sample_1": [ + "sampleQ": [ { "path": "input/sample_1_error_report.csv" } ], - "sample_2_sample2": [ + "sample1": [ + { + "path": "input/sample_2_error_report.csv" + } + ], + "sample2": [ { "path": "input/sample_2_sample2_error_report.csv" } ], - "sample_2": [ + "sampleR": [ { - "path": "input/sample_2_error_report.csv" + "path": "input/sample4_error_report.csv" } ] } }, "metadata": { "samples": { - "sample_1": { - "address": "1.1.3" + "sampleQ": { + "address": "2.2.3" + }, + "sampleR": { + "address": "2.2.3" } } } diff --git a/tests/data/samplesheets/samplesheet-sample_name.csv b/tests/data/samplesheets/samplesheet-sample_name.csv index c6c73d5..fb51952 100644 --- a/tests/data/samplesheets/samplesheet-sample_name.csv +++ b/tests/data/samplesheets/samplesheet-sample_name.csv @@ -3,3 +3,4 @@ sampleQ,sample 1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/ sample1,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1 sample2,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1 sample3,,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2 +sampleR,sample4,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleF.mlst.json, diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 5352627..94e8090 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -221,9 +221,9 @@ nextflow_pipeline { assert lines.contains("sampleR,[\'sampleF\'],Query sampleR ID and JSON key in sampleF.mlst.json DO NOT MATCH. The 'sampleF' key in sampleF.mlst.json has been forcefully changed to 'sampleR': User should manually check input files to ensure correctness.") // Check filter_query csv file - lines = path("$launchDir/results/filter/new_addresses.csv").readLines() - assert lines.contains("sampleQ,2.2.3") - assert lines.contains("sampleR,2.2.3") + lines = path("$launchDir/results/filter/new_addresses.tsv").readLines() + assert lines.contains("sampleQ\tsampleQ\t2.2.3") + assert lines.contains("sampleR\tsampleR\t2.2.3") // Check IRIDA Next JSON output assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/mismatched_iridanext.output.json").json @@ -271,8 +271,8 @@ nextflow_pipeline { assert lines.contains('sample3,"[\'extra_key\', \'sample3\']","MLST JSON file (sample3_multiplekeys.mlst.json) contains multiple keys: [\'extra_key\', \'sample3\']. The MLST JSON file has been modified to retain only the \'sample3\' entry"') // Check filtered query csv results - lines = path("$launchDir/results/filter/new_addresses.csv").readLines() - assert lines.contains("sampleQ,1.1.3") + lines = path("$launchDir/results/filter/new_addresses.tsv").readLines() + assert lines.contains("sampleQ\tsampleQ\t1.1.3") // Check IRIDA Next JSON output assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/multiplekeys_iridanext.output.json").json @@ -320,8 +320,8 @@ nextflow_pipeline { assert lines.contains('sample3,"[\'extra_key\', \'sample4\']",No key in the MLST JSON file (sample3_multiplekeys_nomatch.mlst.json) matches the specified sample ID \'sample3\'. The first key \'extra_key\' has been forcefully changed to \'sample3\' and all other keys have been removed.') // Check filtered query csv results - lines = path("$launchDir/results/filter/new_addresses.csv").readLines() - assert lines.contains("sampleQ,1.1.3") + lines = path("$launchDir/results/filter/new_addresses.tsv").readLines() + assert lines.contains("sampleQ\tsampleQ\t1.1.3") // Check IRIDA Next JSON output assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/multiplekeys_iridanext.output.json").json @@ -385,8 +385,9 @@ nextflow_pipeline { assert lines.contains("sample_2_sample2,[\'sample2\'],Reference sample_2_sample2 ID and JSON key in sample2.mlst.json DO NOT MATCH. The 'sample2' key in sample2.mlst.json has been forcefully changed to 'sample_2_sample2': User should manually check input files to ensure correctness.") // Check filter_query csv file - lines = path("$launchDir/results/filter/new_addresses.csv").readLines() - assert lines.contains("sample_1,1.1.3") + lines = path("$launchDir/results/filter/new_addresses.tsv").readLines() + assert lines.contains("sampleQ\tsample_1\t2.2.3") + assert lines.contains("sampleR\tsample4\t2.2.3") // Check IRIDA Next JSON output assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/sample_name_add_iridanext.output.json").json diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index 4101a69..6899ec7 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -106,6 +106,11 @@ workflow GAS_NOMENCLATURE { reference_values = input_assure.result.collect{ meta, mlst -> mlst} query_values = profiles.query.collect{ meta, mlst -> mlst } + // Query Map: Use to return meta.irida_id to output for mapping to IRIDA-Next JSON + query_map = profiles.query.map{ meta, mlst-> + tuple(meta.id, meta.irida_id) + }.collect() + // LOCIDEX modules ref_tag = Channel.value("ref") query_tag = Channel.value("value") @@ -166,16 +171,19 @@ workflow GAS_NOMENCLATURE { called_data = GAS_CALL(expected_clusters.text, distances.results) ch_versions = ch_versions.mix(called_data.versions) - // Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in - query_ids = profiles.query.collectFile { it[0].id + '\n' } + // Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in and + // add a column with IRIDA ID to allow for IRIDANext plugin to include metadata + query_irida_ids = profiles.query.collectFile { it[0].irida_id + '\t' + it[0].id + '\n'} - new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv") + new_addresses = FILTER_QUERY(query_irida_ids, called_data.distances, "tsv", "tsv") ch_versions = ch_versions.mix(new_addresses.versions) CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + + } /* From d1b71a1cb500aa18a0b8713ec98ce0f9cdd0f20a Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 15:00:31 -0400 Subject: [PATCH 09/16] Update lint CLI command --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe8..8b60f35 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -51,7 +51,7 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} From 222ac0b06966efc811dc03ebb98d75e49cdce431 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 16:09:14 -0400 Subject: [PATCH 10/16] Fixing lint issue --- assets/schema_input.json | 2 +- nextflow_schema.json | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index dbb4a4f..ab0b7ab 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/schema_input.json", "title": "phac-nml/gasnomenclature pipeline - params.input schema", "description": "Schema for the file provided with params.input", diff --git a/nextflow_schema.json b/nextflow_schema.json index b2b8a89..0034635 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/nextflow_schema.json", "title": "phac-nml/gasnomenclature pipeline parameters", "description": "Gas Nomenclature assignment pipeline", @@ -84,8 +84,7 @@ }, "pd_count_missing": { "type": "boolean", - "description": "Count missing alleles as different", - "default": false + "description": "Count missing alleles as different" } } }, From 141d07cbe3beaf34899fed6120efac17e7e56a8e Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 16:23:08 -0400 Subject: [PATCH 11/16] Fixing linting issue --- .github/workflows/linting.yml | 21 ++++++++++++++++++--- .github/workflows/linting_comment.yml | 2 +- .nf-core.yml | 3 +++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8b60f35..a502573 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -41,18 +41,33 @@ jobs: python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - name: Run nf-core lint + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + - name: Save PR number if: ${{ always() }} run: echo ${{ github.event.pull_request.number }} > PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23..42e519b 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.nf-core.yml b/.nf-core.yml index e7be709..7c752f9 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -27,6 +27,9 @@ lint: - custom_config - manifest.name - manifest.homePage + - params.max_cpus + - params.max_memory + - params.max_time readme: - nextflow_badge From 704a25b42823c442d036cf2bacb65e32bc00e2c4 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 16:25:41 -0400 Subject: [PATCH 12/16] Revert change --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index a502573..43c889c 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -58,7 +58,7 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Run nf-core pipelines lint --release if: ${{ github.base_ref == 'master' }} From b2f659e608d486134e9950d679184ec947651cb7 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 16:33:47 -0400 Subject: [PATCH 13/16] Fix linting --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 43c889c..a502573 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -58,7 +58,7 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Run nf-core pipelines lint --release if: ${{ github.base_ref == 'master' }} From 991bc2d2f8352c68d477ff07260bff3575aa6b0d Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 10 Oct 2024 16:39:56 -0400 Subject: [PATCH 14/16] Still fixing linting --- .nf-core.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.nf-core.yml b/.nf-core.yml index 7c752f9..0eda0fa 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,6 @@ repository_type: pipeline -nf_core_version: "2.14.1" +nf_core_version: "3.0.1" lint: files_exist: - assets/nf-core-gasnomenclature_logo_light.png From d1418dd053b56ecc93f38ad7362a964119322c37 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 17 Oct 2024 16:34:40 -0400 Subject: [PATCH 15/16] Make the .editorconfig exception for test JSON files more general --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index 68c0e00..52f5b31 100644 --- a/.editorconfig +++ b/.editorconfig @@ -32,6 +32,6 @@ indent_style = unset indent_style = unset # ignore nf-test json file -[tests/data/irida/sample_name_add_iridanext.output.json] +[tests/data/irida/*.{json}] insert_final_newline = unset trim_trailing_whitespace = unset From 5f8b3ec1da7b1403557b3a60cc5b6fc7069083ab Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 17 Oct 2024 16:38:19 -0400 Subject: [PATCH 16/16] Fix wildcard for Make the .editorconfig exception for test JSON files more general --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index 52f5b31..f3117a9 100644 --- a/.editorconfig +++ b/.editorconfig @@ -32,6 +32,6 @@ indent_style = unset indent_style = unset # ignore nf-test json file -[tests/data/irida/*.{json}] +[tests/data/irida/*.json] insert_final_newline = unset trim_trailing_whitespace = unset