Skip to content

Commit

Permalink
Update to sample yaml and files
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Aug 21, 2024
1 parent 427c6d6 commit b443920
Show file tree
Hide file tree
Showing 29 changed files with 73 additions and 2,235 deletions.
19 changes: 13 additions & 6 deletions assets/idCulLati1.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
# General Vales for all subpiplines and modules
assembly_id: idCulLati1_ear
reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa
reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa
reference_haplotigs: /

# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam

merquryfk:
fastk_hist: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/idCulLati1.k31.hist
fastk_ktab: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/

# Used by both subpipelines
longread:
type: hifi
dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/fasta/
curationpretext:
aligner: minimap2
telomere_motif: TTAGG
hic_dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati2/hic-arima2/
merquryfk:
fastk_hist: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/idCulLati1.k31.hist
fastk_ktab: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
btk:
taxid: 1464561
lineages: "insecta_odb10"
gca_accession: GCA_0001
nt_database: /data/blastdb/Supported/NT/current
nt_database_prefix: nt
diamond_uniprot_database_path: /lustre/scratch123/tol/resources/uniprot_reference_proteomes/latest/reference_proteomes.dmnd
diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/
ncbi_rankedlineage_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/rankedlineage.dmp
btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
taxid: 1464561
gca_accession: GCA_0001
lineages: "insecta_odb10"
config: /nfs/treeoflife-01/teams/tola/users/dp24/ear/conf/sanger-tol-btk.config
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ include { EAR } from './workflows/ear'
workflow SANGERTOL_EAR {

take:
samplesheet // channel: samplesheet read in from --input
input_yaml // channel: input_yaml read in from --input

main:

//
// WORKFLOW: Run pipeline
//
EAR (
samplesheet
input_yaml
)


Expand Down
21 changes: 0 additions & 21 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,6 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"busco/busco": {
"branch": "master",
"git_sha": "17486961b8b1ab1aae258c83a7e947b40d8ab670",
"installed_by": [
"modules"
]
},
"fastqc": {
"branch": "master",
"git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
"installed_by": [
"modules"
]
},
"gfastats": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand All @@ -41,13 +27,6 @@
"modules"
]
},
"multiqc": {
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"installed_by": [
"modules"
]
},
"samtools/merge": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
Expand Down
49 changes: 26 additions & 23 deletions modules/local/sanger_tol_btk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,49 @@ process SANGER_TOL_BTK {
tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet
tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv")
path blastp, stageAs: "blastp.dmnd"
path blastn
path blastn, stageAs: ""
path blastx
path config_file
path tax_dump
path btk_yaml, stageAs: "BTK.yaml"
val busco_lineages
val taxon
val gca_accession

output:
tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/REFERENCE"), emit: dataset
path("${meta.id}_btk_out/blobtoolkit/plots"), emit: plots
path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json
path("${meta.id}_btk_out/busco"), emit: busco_data
path("${meta.id}_btk_out/multiqc"), emit: multiqc_report
path("blobtoolkit_pipeline_info"), emit: pipeline_info
path "versions.yml", emit: versions
tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset
path("*_out/blobtoolkit/plots"), emit: plots
path("*_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json
path("*_out/busco"), emit: busco_data
path("*_out/multiqc"), emit: multiqc_report
path("*_out/blobtoolkit_pipeline_info"), emit: pipeline_info
path "versions.yml", emit: versions

script:
def args = task.ext.args ?: ""
def executor = task.ext.executor ?: ""
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def config = config_file ? "-c $config_file" : ""
def pipeline_version = task.ext.version ?: "draft_assemblies"
// YAML used to avoid the use of GCA accession number
// https://github.com/sanger-tol/blobtoolkit/issues/77
def pipeline_name = task.ext.pipeline_name
def (pipeline_prefix,pipeline_suffix) = pipeline_name.split('/')
def output_dir = "${meta.id}_${pipeline_suffix}_out"
def args = task.ext.args ?: ""
def executor = task.ext.executor ?: ""
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def config = config_file ? "-c $config_file" : ""
def pipeline_version = task.ext.version ?: "main"

// Seems to be an issue where a nested pipeline can't see the files in the same directory
// Running realpath gets around this but the files copied into the folder are
// now just wasted space. Should be fixed with using Mahesh's method of nesting but
// this is proving a bit complicated with BTK

// outdir should be an arg

// blastx and blastp can use the same database hence the StageAs

// Running these as unique jobs means we don't have to worry about multiple pipeline
// head jobs running in the same initial Nextflow head, this balloons memory
// for LSF we can use -Is -tty to keep the output of this sub-pipeline in
// terminal, keeping the job open until the pipeline completes

// the printf statement appends the subpipelines versions file to the main versions file
"""
$executor 'nextflow run sanger-tol/blobtoolkit \\
$executor 'nextflow run $pipeline_name \\
-r $pipeline_version \\
-profile $profiles \\
--input "\$(realpath $samplesheet_csv)" \\
Expand All @@ -62,18 +65,18 @@ process SANGER_TOL_BTK {
$args \\
-resume'
mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Blobtoolkit: $pipeline_version
Nextflow: \$(nextflow -v | cut -d " " -f3)
executor system: $get_version
END_VERSIONS
printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
"""

stub:
def pipeline_version = task.ext.version ?: "draft_assemblies"
def pipeline_version = task.ext.version ?: "main"

"""
mkdir -p ${meta.id}_btk_out/blobtoolkit/${meta.id}_out
Expand Down
36 changes: 30 additions & 6 deletions modules/local/sanger_tol_cpretext.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,60 @@ process SANGER_TOL_CPRETEXT {
path(config_file)

output:
tuple val(reference), path("*_out/*"), emit: dataset
path "versions.yml", emit: versions
tuple val(reference), path("*_out/*"), emit: dataset
path "versions.yml", emit: versions

script:
def pipeline_name = "sanger-tol/curationpretext" // should be a task.ext.args
def pipeline_name = task.ext.pipeline_name
def (pipeline_prefix,pipeline_suffix) = pipeline_name.split('/')
def output_dir = "${reference}_${pipeline_suffix}_out"
def args = task.ext.args ?: ""
def executor = task.ext.executor ?: ""
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def config = config_file ? "-c $config_file" : ""
def pipeline_version = task.ext.version ?: "draft_assemblies"
def pipeline_version = task.ext.version ?: "main"

// Seems to be an issue where a nested pipeline can't see the files in the same directory
// Running realpath gets around this but the files copied into the folder are
// now just wasted space. Should be fixed with using Mahesh's method of nesting but
// this is proving a bit complicated with BTK

// outdir should be an arg
// Running these as unique jobs means we don't have to worry about multiple pipeline
// head jobs running in the same initial Nextflow head, this balloons memory
// for LSF we can use -Is -tty to keep the output of this sub-pipeline in
// terminal, keeping the job open until the pipeline completes

// the printf statement appends the subpipelines versions file to the main versions file
"""
$executor 'nextflow run $pipeline_name \\
-r $pipeline_version \\
-profile $profiles \\
--input "\$(realpath $reference)" \\
--outdir ${reference}_${pipeline_suffix}_out \\
--outdir $output_dir \\
--longread "\$(realpath $longread_dir)" \\
--cram "\$(realpath $cram_dir)" \\
$args \\
$config \\
-resume'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
$pipeline_suffix: $pipeline_version
Nextflow: \$(nextflow -v | cut -d " " -f3)
executor system: $get_version
END_VERSIONS
printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
"""

stub:
def pipeline_version = task.ext.version ?: "main"
def (pipeline_prefix,pipeline_suffix) = pipeline_name.split('/')
def output_dir = "${reference}_${pipeline_suffix}_out"
"""
mkdir ${output_dir}
touch ${output_dir}/reference.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 0 additions & 7 deletions modules/nf-core/busco/busco/environment.yml

This file was deleted.

107 changes: 0 additions & 107 deletions modules/nf-core/busco/busco/main.nf

This file was deleted.

Loading

0 comments on commit b443920

Please sign in to comment.