Skip to content

Commit

Permalink
fix variable declarations
Browse files Browse the repository at this point in the history
  • Loading branch information
ens-ftricomi committed Apr 12, 2024
1 parent bcc7819 commit 3231ba7
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 234 deletions.
5 changes: 3 additions & 2 deletions pipelines/nextflow/modules/build_metadata.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ process BUILD_METADATA {


output:
stdout
tuple val(db_meta)

script:
"""
echo "gca,taxon_id"
echo "$gca,\$$taxon_id"
"""
}
}
5 changes: 1 addition & 4 deletions pipelines/nextflow/modules/copy_output_to_ensembl_ftp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

include { make_publish_dir } from '../utils.nf'

process COPY_OUTPUT_TO_ENSEMBL_FTP {
// rename busco summary file in <production name>_gca_busco_short_summary.txt
tag "$db.species:$db.gca"
label 'default'
publishDir { make_publish_dir(db.publish_dir, ${params.project}, 'statistics') }, mode: 'copy'
publishDir "${params.production_ftp_dir}/${db_meta.publish_dir}/statistics", mode: 'copy'

input:
tuple val(db_meta), path(summary_file)

output:
path(${params.production_ftp_dir}/${db_meta.publish_dir}/statistics)

script:
"""
Expand Down
9 changes: 3 additions & 6 deletions pipelines/nextflow/modules/omark/omark_output.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

include { make_publish_dir } from '../utils.nf'
//include { make_publish_dir } from '../utils.nf'

process OMARK_OUTPUT {
// rename busco summary file in <production name>_gca_busco_short_summary.txt
tag "$db.species:$db.gca"
label 'default'
publishDir { make_publish_dir(db.publish_dir, project, 'statistics') }, mode: 'copy'
publishDir "db.publish_dir/statistics", mode: 'copy'

input:
tuple val(db), path(summary_file, stageAs: "short_summary_from_busco_run.txt")
val(datatype)
val(project)
path("proteins_detailed_summary.txt"), emit: summary_file

output:
path("*_proteins_detailed_summary.txt"), emit:summary_file
Expand All @@ -39,4 +36,4 @@ process OMARK_OUTPUT {
def gca = db.gca.toLowerCase().replaceAll(/\./, "v").replaceAll(/_/, "")
summary_name = [species, gca, "omark", "proteins_detailed_summary.txt"].join("_")
"""
}
}
215 changes: 3 additions & 212 deletions pipelines/nextflow/modules/utils.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,225 +25,16 @@ def make_publish_dir(publish_dir, project, name) {
return list.join("/")
}

















import groovy.sql.Sql
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter

def checkTaxonomy(String jdbcUrl, String username, String password, String taxonId) {
def sql = Sql.newInstance(jdbcUrl, username, password)

try {
def query = "SELECT * FROM meta WHERE taxon_id = '${taxonId}'"
def result = sql.rows(query)
return result.size() > 0
} catch (Exception ex) {
ex.printStackTrace()}
finally {
sql.close()
}
}

def getLastCheckedDate(String jdbcUrl, String username, String password, String taxonId) {
def sql = Sql.newInstance(jdbcUrl, username, password)
def lastCheckedDate = null

try {
def query = "SELECT last_check FROM meta WHERE taxon_id = '${taxonId}'"
def result = sql.rows(query)

if (result.size() > 0) {
// Assuming 'last_check' is a date-like column
// Adjust the date format pattern based on the actual format in your database
def dateFormat = new SimpleDateFormat("yyyy-MM-dd") // Adjust the format if needed
lastCheckedDate = dateFormat.parse(result[0].last_check)
}
} catch (Exception ex) {
ex.printStackTrace()
} finally {
sql.close()
}

return lastCheckedDate
}

def insertMetaRecord(String jdbcUrl, String username, String password, String taxonId) {
def sql = Sql.newInstance(jdbcUrl, username, password)

try {
// Get the current date and time
def currentDate = LocalDateTime.now()
def dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
def formattedDate = currentDate.format(dateFormatter)

// Execute the SQL INSERT statement
def insertQuery = "INSERT INTO meta (taxon_id, last_checked_date) VALUES ('${taxonId}', '${formattedDate}')"
sql.executeUpdate(insertQuery, 'meta_id')
} catch (Exception ex) {
ex.printStackTrace()
} finally {
sql.close()
}

}
def updateLastCheckedDate(String jdbcUrl, String username, String password, String taxonId) {
def sql = Sql.newInstance(jdbcUrl, username, password)

try {
// Get the current date and time
def currentDate = LocalDateTime.now()
def dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
def formattedDate = currentDate.format(dateFormatter)

// Execute the SQL UPDATE statement
def updateQuery = "UPDATE meta SET last_checked_date = '${formattedDate}' WHERE taxon_id = '${taxonId}'"
sql.executeUpdate(updateQuery)
} catch (Exception ex) {
ex.printStackTrace()
}finally {
sql.close()
}

}
def build_ncbi_path(gca, assembly_name) {
final gca_splitted = gca.replaceAll("_","").tokenize(".")[0].split("(?<=\\G.{3})").join('/')
return 'https://ftp.ncbi.nlm.nih.gov/genomes/all' + '/' + gca_splitted + '/' + "$gca" +'_' + assembly_name.replaceAll(" ","_") + '/' + "$gca" + '_' + assembly_name.replaceAll(" ","_") + '_genomic.fna.gz'
}

def getPairedFastqsURL(String jdbcUrl, String username, String password, String run_accession) {
def sql = Sql.newInstance(jdbcUrl, username, password)
try {
def query = "SELECT url FROM file INNER JOIN run ON run_id WHERE run_accession = '${run_accession}'"
def result = sql.rows(query)
} catch (Exception ex) {
ex.printStackTrace()
} finally {
sql.close()
}

return result
def buildMetadata(gca, taxon_id) {
def db_meta = ["gca": gca, "taxon_id": taxon_id]
return db_meta
}

def checkFastqc(String jdbcUrl, String username, String password, String run_accession) {
def sql = Sql.newInstance(jdbcUrl, username, password)
def query = """ SELECT basic_statistics, per_base_sequence_quality, per_sequence_quality_scores, \
per_base_sequence_content
FROM data_files df
INNER JOIN run r on df.run_id =r.run_id
WHERE r.run_id= '${run_accession}'
"""
def qc_status = null

try {
def result = sql.rows(query)
// Process the results
results.each { row ->
def basicStatistics = row.basic_statistics
def perBaseSequenceQuality = row.per_base_sequence_quality
def perSequenceQualityScores = row.per_sequence_quality_scores
def perBaseSequenceContent = row.per_base_sequence_content
if (basicStatistics=='PASS' && perBaseSequenceQuality='PASS' &&
perSequenceQualityScores='PASS' && perBaseSequenceContent='PASS') {
// Execute the SQL UPDATE statement
def updateQuery = "UPDATE RUN set qc_status = 'QC_PASS' WHERE run_id= '${run_accession}'"
sql.executeUpdate(updateQuery)
qc_status = 'QC_PASS'
}
else {
// Execute the SQL UPDATE statement
def updateQuery = "UPDATE RUN set qc_status = 'QC_FAIL' WHERE run_id= '${run_accession}'"
sql.executeUpdate(updateQuery)
qc_status = 'QC_FAIL'
}
}
} catch (Exception ex) {
ex.printStackTrace()}
finally {
sql.close()
}

return qc_status
}

def checkOverrepresentedSequences(String jdbcUrl, String username, String password, String run_accession) {
def sql = Sql.newInstance(jdbcUrl, username, password)
def query = """ SELECT overrepresented_sequences
FROM data_files df
INNER JOIN run r on df.run_id =r.run_id
WHERE r.run_id= '${run_accession}'
"""
def overrepresented_sequences = null

try {
def result = sql.rows(query)
// Process the results
results.each { row ->
def OverrepresentedSequences = row.overrepresented_sequences

if (OverrepresentedSequences=='WARN' OR OverrepresentedSequences=='FAIL') {
overrepresented_sequences = True
}
else {
overrepresented_sequences = False
}
}
} catch (Exception ex) {
ex.printStackTrace()}
finally {
sql.close()
}
return overrepresented_sequences
}
def concatString(string1, string2, string3){
return string1 + '_'+string2 + '_'+string3
}

def calculateIndexBases(genomeFile) {
def indexBases = Math.min(14, Math.floor((Math.log(genomeFile, 2) / 2) - 1))
return indexBases
}

def getRunId(String jdbcUrl, String username, String password, String run_accession, String gca, String percentage_mapped) {
def sql = Sql.newInstance(jdbcUrl, username, password)
def run_id = null
try {
def query = "SELECT run_id FROM run WHERE run_accession = '${run_accession}'"
run_id = sql.rows(query)
return run_id
} catch (Exception ex) {
ex.printStackTrace()
} finally {
sql.close()
}

}

def updateFastqcStatus(String jdbcUrl, String username, String password, String run_accession) {
def sql = Sql.newInstance(jdbcUrl, username, password)

try {
// Execute the SQL UPDATE statement
def updateQuery = "UPDATE run SET qc_status = 'ALIGNED' WHERE run_accession = '${run_accession}'"
sql.executeUpdate(updateQuery)
} catch (Exception ex) {
ex.printStackTrace()
}finally {
sql.close()
}

}
4 changes: 3 additions & 1 deletion pipelines/nextflow/subworkflows/run_busco.nf
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ include { CLEANING } from '../modules/cleaning.nf'
*/
workflow RUN_BUSCO{
take:
tuple val(db_meta), val(busco_mode), bool(copyToFtp)
db_meta
busco_mode
copyToFtp

main:
// Get the closest Busco dataset from the taxonomy classification stored in db meta table
Expand Down
5 changes: 3 additions & 2 deletions pipelines/nextflow/subworkflows/run_ensembl_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ limitations under the License.

nextflow.enable.dsl=2

includeConfig '../../../workflows/nextflow.config'
//includeConfig '../../../workflows/nextflow.config'


/*
Expand All @@ -43,7 +43,8 @@ include { CLEANING } from '../modules/cleaning.nf'

workflow RUN_ENSEMBL_STATS{
take:
tuple val(dbname),val(db_meta)
dbname
db_meta

main:

Expand Down
9 changes: 5 additions & 4 deletions pipelines/nextflow/subworkflows/run_omark.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ limitations under the License.

nextflow.enable.dsl=2

includeConfig '../../../workflows/nextflow.config'
includeConfig '../conf/omark.config'
//includeConfig '../../../workflows/nextflow.config'
//includeConfig '../conf/omark.config'


/*
Expand Down Expand Up @@ -47,7 +47,8 @@ include { CLEANING } from '../modules/cleaning.nf'

workflow RUN_OMARK{
take:
tuple val(dbname),val(db_meta)
dbname
db_meta

main:
//
Expand All @@ -63,7 +64,7 @@ workflow RUN_OMARK{
//
omarkOutput = OMARK (omamerOutput)

omarkSummaryFile = OMARK_OUTPUT(db_meta, omarkOutput, params.project)
omarkSummaryFile = OMARK_OUTPUT(db_meta, omarkOutput)
if (params.copyToFtp) {
COPY_OMARK_OUTPUT(db_meta, omarkSummaryFile)
}
Expand Down
10 changes: 7 additions & 3 deletions pipelines/nextflow/workflows/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ include { RUN_ENSEMBL_STATS } from '../subworkflows/run_ensembl_stats.nf'
include { BUILD_METADATA } from '../modules/build_metadata.nf'
include { SPECIES_METADATA } from '../modules/species_metadata.nf'

include { buildMetadata } from '../modules/utils.nf'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
Expand All @@ -119,12 +120,15 @@ workflow STATISTICS{
if (params.run_busco_ncbi) {
// Read data from the CSV file, split it, and map each row to extract GCA and taxon values

data = Channel.fromPath(params.csvFile).splitCsv().map { row ->
data = Channel.fromPath(params.csvFile).splitCsv(sep:',').map { row ->
def gca = row[0]
def taxon = row[1]
def busco_mode = 'genome'
def copyToFtp = false
db_meta = BUILD_METADATA(gca,taxon_id)
println("GCA: $gca, Taxon: $taxon")
def db_meta = buildMetadata(gca, taxon_id)

//def db_meta = BUILD_METADATA(gca,taxon_id)
RUN_BUSCO(db_meta, busco_mode, copyToFtp)
}
}
Expand Down Expand Up @@ -152,4 +156,4 @@ workflow STATISTICS{
exec "rm -rf ${params.cacheDir}/*"
}

}
}

0 comments on commit 3231ba7

Please sign in to comment.