Skip to content

Commit

Permalink
Merge branch 'dev' into kmer_count
Browse files Browse the repository at this point in the history
  • Loading branch information
weaglesBio authored Nov 17, 2023
2 parents e40e4ce + 5edc7ad commit 40ce6a4
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 88 deletions.
15 changes: 3 additions & 12 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,10 @@
<<<<<<< HEAD
assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa
<<<<<<< HEAD
assembly_title: asccTest
pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
pacbio_multiplexing_barcode_names: "bc1008,bc1009"
pacbio_reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/pacbio/
=======
=======

assembly_path: /lustre/scratch124/tol/projects/tol/data/insects/Polyommatus_atlantica/assembly/draft/treeval/ilPolAtla1_merged/raw/ref.fa
>>>>>>> 4a92aaa (refine bedtools and other scripts)
assembly_title: asccTinyTest
reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/
reads_type: "hifi"
pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
pacbio_multiplexing_barcode_names: "bc1008,bc1009"
>>>>>>> 3ce433c (done)
sci_name: "Plasmodium yoelii yoelii 17XNL"
taxid: 352914
mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
Expand All @@ -23,7 +13,8 @@ kmer_len: 7
dimensionality_reduction_methods: "pca,random_trees"
# all available methods
# "pca,umap,t-sne,isomap,lle_standard,lle_hessian,lle_modified,mds,se,random_trees,kernel_pca,pca_svd,autoencoder_sigmoid,autoencoder_linear,autoencoder_selu,autoencoder_relu,nmf"
nt_database: /data/blastdb/Supported/NT/current/
nt_database: /data/blastdb/Supported/NT/202308/dbv4/
nt_database_prefix: nt
nt_kraken_db_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/nt/nt
ncbi_accessionids_folder: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/ncbi_taxonomy/20230509_accession2taxid/
ncbi_taxonomy_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/
Expand Down
3 changes: 0 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,11 @@ process {
ext.prefix = { "${meta.id}_euk" }
}

<<<<<<< HEAD
=======
withName: SED_SED {
ext.prefix = { "${meta.id}_fixed" }
ext.args = " -e '/>/s/ //g' "
}

>>>>>>> 0497a94 (Completing organelle blast, modified python script to accept arrayList and parse inside script)
withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"samtools/view": {
"branch": "master",
"git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f",
"installed_by": ["modules"]
},
"seqkit/sliding": {
"branch": "master",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
Expand Down
202 changes: 129 additions & 73 deletions workflows/ascc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ WorkflowAscc.initialise(params, log)
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//

include { YAML_INPUT } from '../subworkflows/local/yaml_input'
include { GENERATE_GENOME } from '../subworkflows/local/generate_genome'
include { EXTRACT_TIARA_HITS } from '../subworkflows/local/extract_tiara_hits'
Expand All @@ -33,11 +34,14 @@ include { RUN_NT_KRAKEN } from '../subworkflows/local/run_nt_kra
include { RUN_FCSGX } from '../subworkflows/local/run_fcsgx'
include { PACBIO_BARCODE_CHECK } from '../subworkflows/local/pacbio_barcode_check'
include { GET_KMERS_PROFILE } from '../subworkflows/local/get_kmers_profile'
include { RUN_READ_COVERAGE } from '../subworkflows/local/run_read_coverage'
include { ORGANELLAR_BLAST as PLASTID_ORGANELLAR_BLAST } from '../subworkflows/local/organellar_blast'
include { ORGANELLAR_BLAST as MITO_ORGANELLAR_BLAST } from '../subworkflows/local/organellar_blast'

//
// MODULE: Local modules
//
include { GC_CONTENT } from '../modules/local/gc_content'
include { GC_CONTENT } from '../modules/local/gc_content'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -48,7 +52,7 @@ include { GC_CONTENT } from '../modules/local/gc_content'
//
// MODULE: Installed directly from nf-core/modules
//
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -71,13 +75,13 @@ workflow ASCC {
)
ch_versions = ch_versions.mix(YAML_INPUT.out.versions)

// //
// // MODULE: CALCULATE GC CONTENT PER SCAFFOLD IN INPUT FASTA
// //
// GC_CONTENT (
// YAML_INPUT.out.reference_tuple
// )
// ch_versions = ch_versions.mix(GC_CONTENT.out.versions)
//
// MODULE: CALCULATE GC CONTENT PER SCAFFOLD IN INPUT FASTA
//
GC_CONTENT (
YAML_INPUT.out.reference_tuple
)
ch_versions = ch_versions.mix(GC_CONTENT.out.versions)

// //Channel
// // .fromPath( YAML_INPUT.out.nt_database, checkIfExists=true )
Expand All @@ -92,70 +96,6 @@ workflow ASCC {
)
ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions)

// //
// // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA
// //
// EXTRACT_TIARA_HITS (
// GENERATE_GENOME.out.reference_tuple
// )
// ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)

// //
// // LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE
// //
// YAML_INPUT.out.reference_tuple
// .combine ( YAML_INPUT.out.seqkit_sliding.toInteger() )
// .combine ( YAML_INPUT.out.seqkit_window.toInteger() )
// .map { meta, ref, sliding, window ->
// tuple([ id : meta.id,
// sliding : sliding,
// window : window
// ],
// file(ref)
// )}
// .set { modified_input }

// //
// // SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST
// //
// /* EXTRACT_NT_BLAST (
// modified_input,
// YAML_INPUT.out.nt_database,
// YAML_INPUT.out.ncbi_taxonomy_path,
// YAML_INPUT.out.ncbi_rankedlineage_path
// )
// ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions) */

// //
// // SUBWORKFLOW:
// //
// RUN_FCSADAPTOR (
// YAML_INPUT.out.reference_tuple
// )
// ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)

// //
// // SUBWORKFLOW:
// //
// RUN_FCSGX (
// YAML_INPUT.out.reference_tuple,
// YAML_INPUT.out.fcs_gx_database_path,
// YAML_INPUT.out.taxid,
// YAML_INPUT.out.ncbi_rankedlineage_path
// )
// ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)

// //
// // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA
// //
// PACBIO_BARCODE_CHECK (
// YAML_INPUT.out.reference_tuple,
// YAML_INPUT.out.pacbio_tuple,
// YAML_INPUT.out.pacbio_barcodes,
// YAML_INPUT.out.pacbio_multiplex_codes
// )
// ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)

//
// SUBWORKFLOW: COUNT KMERS, THEN REDUCE DIMENSIONS USING SELECTED METHODS
//
Expand All @@ -168,6 +108,122 @@ workflow ASCC {
)
ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions)

//
// SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA
//
EXTRACT_TIARA_HITS (
GENERATE_GENOME.out.reference_tuple
)
ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)

//
// LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE
//
YAML_INPUT.out.reference_tuple
.combine ( YAML_INPUT.out.seqkit_sliding.toInteger() )
.combine ( YAML_INPUT.out.seqkit_window.toInteger() )
.map { meta, ref, sliding, window ->
tuple([ id : meta.id,
sliding : sliding,
window : window
],
file(ref)
)}
.set { modified_input }

//
// SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST
//
/* EXTRACT_NT_BLAST (
modified_input,
YAML_INPUT.out.nt_database,
YAML_INPUT.out.ncbi_accessions,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions) */

//
// LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH
//
YAML_INPUT.out.mito_tuple
.branch { meta, check ->
valid: check != "NO MITO"
invalid: check == "NO MITO"
}
.set { mito_check }

//
// SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME
//
MITO_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.mito_var,
mito_check.valid
)
ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions)

//
// LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH
//
YAML_INPUT.out.plastid_tuple
.branch { meta, check ->
valid: check != "NO PLASTID"
invalid: check == "NO PLASTID"
}
.set { plastid_check }

//
// SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME
//
PLASTID_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.plastid_var,
plastid_check.valid
)
ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions)

//
// SUBWORKFLOW:
//
RUN_FCSADAPTOR (
YAML_INPUT.out.reference_tuple
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)

//
// SUBWORKFLOW:
//
RUN_FCSGX (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.fcs_gx_database_path,
YAML_INPUT.out.taxid,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)

//
// SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA
//
/*PACBIO_BARCODE_CHECK (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.pacbio_barcodes,
YAML_INPUT.out.pacbio_multiplex_codes
)
ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)*/

//
// SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE
//
RUN_READ_COVERAGE (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.assembly_path,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.reads_type
)
ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions)


//
// SUBWORKFLOW: COLLECT SOFTWARE VERSIONS
//
Expand Down

0 comments on commit 40ce6a4

Please sign in to comment.