Skip to content

Commit

Permalink
update: nextflow
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Nov 20, 2024
1 parent f4d1bce commit 80b2da5
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 19 deletions.
62 changes: 62 additions & 0 deletions nextflow/conf/base.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process {

cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_very_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 3.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 6.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
}
withLabel:error_retry {
errorStrategy = 'retry'
maxRetries = 2
}
}

params {
// Defaults only, expecting to be overwritten
max_memory = 128.GB
max_cpus = 16
max_time = 240.h
}
55 changes: 36 additions & 19 deletions nextflow/maxquant_psm.nf
Original file line number Diff line number Diff line change
@@ -1,31 +1,39 @@
nextflow.enable.dsl=2

params.msms_file
params.mzml_dir
params.output_dir
params.chunksize = 1000000
params.output_prefix_file = "psm"
params.file_num = 20
params.partitions = ""

workflow {
generateResults(params.msms_file, params.output_dir, params.chunksize, params.output_prefix_file)
extractInfoFromMzml(generateResults.out, params.mzml_dir, params.output_dir, params.file_num, params.partitions)
process msconvert {
publishDir "${params.mzml_dir}", mode:'copy', overwrite: true

if (workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container) {
container 'https://containers.biocontainers.pro/s3/SingImgsRepo/thermorawfileparser/1.3.3--h1341992_0/thermorawfileparser:1.3.3--h1341992_0'
}
else {
container 'quay.io/biocontainers/thermorawfileparser:1.3.3--h1341992_0'
}

input:
path rawFile

output:
path '*.mzML', emit: mzmlFiles

script:
"""
mkdir mzml
ThermoRawFileParser.sh -i=${rawFile} -f=2 -o=./
"""
}

process generateResults {
input:
path msmsFile
path outputDir
val chunksize
val output_prefix_file

output:
path "**/*.psm.parquet", emit: 'psm'

script:
"""
quantmsioc convert-maxquant-psm --msms_file ${msmsFile} --output_folder ${outputDir} --chunksize ${chunksize} --output_prefix_file ${output_prefix_file}
quantmsioc convert-maxquant-psm --msms_file ${msmsFile} --output_folder ${outputDir} --chunksize ${params.chunksize} --output_prefix_file ${params.output_prefix_file}
"""
}

Expand All @@ -34,17 +42,26 @@ process extractInfoFromMzml {
path resultsFile
path mzmlDir
path outputDir
val file_num
val partitions

script:
if (partitions != ''){
if (params.partitions){
"""
quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ${mzmlDir} --output_folder res/${outputDir} --file_num ${file_num} --partitions ${partitions}
quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ./ --output_folder res/${outputDir} --file_num ${params.file_num} --partitions ${params.partitions}
"""
}else {
"""
quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ${mzmlDir} --output_folder res/${outputDir} --file_num ${file_num}
quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ./ --output_folder res/${outputDir} --file_num ${params.file_num}
"""
}
}

workflow {
Channel
.fromPath("${params.raw_dir}/*.raw")
.set{ rawFiles }

msconvert(rawFiles)
generateResults(params.msms_file, params.output_dir)
extractInfoFromMzml(generateResults.out, msconvert.out, params.output_dir)

}
90 changes: 90 additions & 0 deletions nextflow/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

params {
chunksize = 1000000
output_prefix_file = "psm"
file_num = 20
partitions = ""
}


includeConfig 'conf/base.config'

profiles {
docker {
docker.enabled = true
conda.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
docker.runOptions = '-u $(id -u):$(id -g)'
}
arm {
docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
conda.enabled = false
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}

ebislurm{
conda.enable = false
docker.enabled = false
singularity.enabled = true
executor {
name = "slurm"
queueSize = 1000
submitRateLimit = "10/1sec"
exitReadTimeout = "30 min"
jobName = {
task.name // [] and " " not allowed in lsf job names
.replace("[", "(")
.replace("]", ")")
.replace(" ", "_")
}
}
singularity.autoMounts = false
singularity.runOptions = '-B /hps/nobackup/juan/pride/reanalysis:/hps/nobackup/juan/pride/reanalysis'
singularity.cacheDir = "/hps/nobackup/juan/pride/reanalysis/singularity/"
}
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
if (type == 'memory') {
try {
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
try {
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}

0 comments on commit 80b2da5

Please sign in to comment.