Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workflow #4

Merged
merged 16 commits into from
Apr 4, 2024
49 changes: 46 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ process {
// Publish directory names
assembly_directory_name = "assembly"
summary_directory_name = "summary"
profile_dists_directory_name = "distances"
gas_call_directory_name = "call"

locidex_merge_directory_name = [params.outdir , "locidex", "merge"].join(File.separator)
locidex_merge_ref_directory_name = [params.outdir , "locidex", "merge", "reference"].join(File.separator)
locidex_merge_query_directory_name = [params.outdir , "locidex", "merge", "query"].join(File.separator)

publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
Expand Down Expand Up @@ -56,15 +59,55 @@ process {
]
}

withName: LOCIDEX_MERGE {
withName: LOCIDEX_MERGE_REF {
publishDir = [
path: locidex_merge_directory_name,
path: locidex_merge_ref_directory_name,
mode: params.publish_dir_mode,
pattern: "*/*",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: LOCIDEX_MERGE_QUERY {
publishDir = [
path: locidex_merge_query_directory_name,
mode: params.publish_dir_mode,
pattern: "*/*",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: PROFILE_DISTS {
publishDir = [
path: { ["${params.outdir}", "${task.profile_dists_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null :
filename.contains(File.separator) ? filename.split(File.separator)[-1] : filename }
]
}

withName: GAS_CALL {
publishDir = [
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/thresholds.json"
],
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/results.{text,parquet}"
],
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/run.json"
]
]
}


withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
11 changes: 10 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,14 @@ params {
max_time = '1.h'

// Input data
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv'
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv'
ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt'
}


/* This is required to run in WSL/Ubuntu using singularity
Without this, profile_dists was not successfully completing
due to issues with multiprocessing in the container. A similar
error is found at https://github.com/marcelm/cutadapt/issues/583
*/
singularity.runOptions = "--contain"
11 changes: 10 additions & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,14 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'

// Input data for full size test
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv'
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv'
ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt'
}

/* This is required to run in WSL/Ubuntu using singularity
Without this, profile_dists was not successfully completing
due to issues with multiprocessing in the container. A similar
error is found at https://github.com/marcelm/cutadapt/issues/583
*/
singularity.runOptions = "--contain"

13 changes: 7 additions & 6 deletions modules/local/gas/call/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,26 @@

process GAS_CALL{
label "process_high"
tag "Calling: ${meta.id}"
tag "Assigning Nomenclature"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"


input:
tuple val(meta), path(reference_clusters), path(distances)
path(reference_clusters)
path(distances)

output:
tuple val(meta), path("${prefix}/results.{text,parquet}"), emit: distances, optional: true
tuple val(meta), path("${prefix}/thresholds.json"), emit: thresholds
tuple val(meta), path("${prefix}/run.json"), emit: run
path("${prefix}/results.{text,parquet}"), emit: distances, optional: true
path("${prefix}/thresholds.json"), emit: thresholds
path("${prefix}/run.json"), emit: run
path "versions.yml", emit: versions

script:
// Need to add more args for gas call below
prefix = meta.id
prefix = "Called"
"""
gas call --dists $distances \\
--rclusters $reference_clusters \\
Expand Down
8 changes: 5 additions & 3 deletions modules/local/locidex/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@ process LOCIDEX_MERGE {
'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"

input:
val input_values // [file(sample1), file(sample2), file(sample3), etc...]
path input_values // [file(sample1), file(sample2), file(sample3), etc...]
val input_tag // makes output unique and denotes the item as the reference or query to preven name collision

output:
path("${combined_dir}/*.tsv"), emit: combined_profiles
path("${combined_dir}/*.json"), emit: report
path "versions.yml", emit: versions

script:
combined_dir = "merged"
combined_dir = "merged_${input_tag}"
"""
locidex merge -i ${input_values.join(' ')} -o ${combined_dir}

mv ${combined_dir}/*.tsv ${combined_dir}/merged_profiles_${input_tag}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
locidex merge: \$(echo \$(locidex search -V 2>&1) | sed 's/^.*locidex //' )
Expand Down
23 changes: 12 additions & 11 deletions modules/local/profile_dists/main.nf
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
process PROFILE_DISTS{
label "process_high"
tag "Pairwise Distance Generation: ${meta.id}"
tag "Gathering Distances Between Reference and Query Profiles"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' :
'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"

input:
tuple val(meta), path(query), path(ref)
path query
path ref
val mapping_format
path(mapping_file)
path(columns)
path mapping_file
path columns


output:
tuple val(meta), path("${prefix}_${mapping_format}/allele_map.json"), emit: allele_map
tuple val(meta), path("${prefix}_${mapping_format}/query_profile.{text,parquet}"), emit: query_profile
tuple val(meta), path("${prefix}_${mapping_format}/ref_profile.{text,parquet}"), emit: ref_profile
tuple val(meta), path("${prefix}_${mapping_format}/results.{text,parquet}"), emit: results
tuple val(meta), path("${prefix}_${mapping_format}/run.json"), emit: run
path("${prefix}/allele_map.json"), emit: allele_map
path("${prefix}/query_profile.{text,parquet}"), emit: query_profile
path("${prefix}/ref_profile.{text,parquet}"), emit: ref_profile
path("${prefix}/results.{text,parquet}"), emit: results
path("${prefix}/run.json"), emit: run
path "versions.yml", emit: versions


Expand All @@ -41,7 +42,7 @@ process PROFILE_DISTS{
args = args + " --count_missing"
}
// --match_threshold $params.profile_dists.match_thresh \\
prefix = meta.id
prefix = "distances_${mapping_format}"
"""
profile_dists --query $query --ref $ref $args --outfmt $mapping_format \\
--distm $params.pd_distm \\
Expand All @@ -50,7 +51,7 @@ process PROFILE_DISTS{
--sample_qual_thresh $params.pd_sample_quality_threshold \\
--max_mem ${task.memory.toGiga()} \\
--cpus ${task.cpus} \\
-o ${prefix}_${mapping_format}
-o ${prefix}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
17 changes: 17 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,23 @@ params {
validate_params = true

// Profile Dists
pd_outfmt = "pairwise"
pd_distm = "scaled"
pd_missing_threshold = 1.0
pd_sample_quality_threshold = 1.0
pd_match_threshold = -1.0
pd_file_type = "text"
pd_mapping_file = null // default is no file
pd_force = false
pd_skip = false
pd_columns = null
pd_count_missing = true


// GAS Call
gm_thresholds = "10,5,0"
gm_delimiter = "'.'" // note the single quotes surrounding the delimiter
ref_clusters = ""

}

Expand Down
2 changes: 1 addition & 1 deletion nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ config {
testsDir "tests"
workDir ".nf-test"
configFile "tests/nextflow.config"
profile ""
profile "docker"

}
5 changes: 5 additions & 0 deletions tests/data/called/expected_results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id address level_1 level_2 level_3
sample1 1.1.1 1 1 1
sample2 1.1.1 1 1 1
sample3 2.2.2 2 2 2
sampleQ 1.1.1 1 1 1
4 changes: 4 additions & 0 deletions tests/data/distances/expected_pairwise_dists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
query_id ref_id dist
sampleQ sample1 0.0
sampleQ sample2 33.333333333333336
sampleQ sample3 66.66666666666667
8 changes: 4 additions & 4 deletions tests/data/profiles/expected-profile1.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample_id l1 l2 l3
sample1 1 1 1
sample2 1 1 1
sample3 1 1 2
sample_id l1 l2 l3
sample1 1 1 1
sample2 1 1 1
sample3 1 1 2
38 changes: 0 additions & 38 deletions tests/modules/local/assemblystub/main.nf.test

This file was deleted.

40 changes: 0 additions & 40 deletions tests/modules/local/generatesamplejson/main.nf.test

This file was deleted.

37 changes: 0 additions & 37 deletions tests/modules/local/generatesummary/main.nf.test

This file was deleted.

Loading
Loading