diff --git a/nextflow/conf/base.config b/nextflow/conf/base.config new file mode 100644 index 0000000..a71eeed --- /dev/null +++ b/nextflow/conf/base.config @@ -0,0 +1,62 @@ +process { + + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_very_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 3.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} + +params { + // Defaults only, expecting to be overwritten + max_memory = 128.GB + max_cpus = 16 + max_time = 240.h +} diff --git a/nextflow/maxquant_psm.nf b/nextflow/maxquant_psm.nf index f849ccd..3288d9d 100644 --- a/nextflow/maxquant_psm.nf +++ b/nextflow/maxquant_psm.nf @@ -1,31 +1,39 @@ nextflow.enable.dsl=2 -params.msms_file -params.mzml_dir -params.output_dir -params.chunksize = 1000000 -params.output_prefix_file = "psm" -params.file_num = 20 -params.partitions = "" -workflow { - generateResults(params.msms_file, params.output_dir, params.chunksize, params.output_prefix_file) - extractInfoFromMzml(generateResults.out, params.mzml_dir, params.output_dir, params.file_num, params.partitions) +process msconvert { + publishDir "${params.mzml_dir}", mode:'copy', overwrite: true + if (workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container) { + container 'https://containers.biocontainers.pro/s3/SingImgsRepo/thermorawfileparser/1.3.3--h1341992_0/thermorawfileparser:1.3.3--h1341992_0' + } + else { + container 'quay.io/biocontainers/thermorawfileparser:1.3.3--h1341992_0' + } + + input: + path rawFile + + output: + path '*.mzML', emit: mzmlFiles + + script: + """ + mkdir mzml + ThermoRawFileParser.sh -i=${rawFile} -f=2 -o=./ + """ } + process generateResults { input: path msmsFile path outputDir - val chunksize - val output_prefix_file - output: path "**/*.psm.parquet", emit: 'psm' script: """ - quantmsioc convert-maxquant-psm --msms_file ${msmsFile} --output_folder ${outputDir} --chunksize ${chunksize} --output_prefix_file ${output_prefix_file} + quantmsioc convert-maxquant-psm --msms_file ${msmsFile} --output_folder ${outputDir} --chunksize ${params.chunksize} --output_prefix_file ${params.output_prefix_file} """ } @@ -34,17 +42,26 @@ process extractInfoFromMzml { path resultsFile path mzmlDir path outputDir - val file_num - val partitions script: - if (partitions != ''){ + if (params.partitions){ """ - quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ${mzmlDir} --output_folder res/${outputDir} --file_num ${file_num} --partitions ${partitions} + quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ./ --output_folder res/${outputDir} --file_num ${params.file_num} --partitions ${params.partitions} """ }else { """ - quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ${mzmlDir} --output_folder res/${outputDir} --file_num ${file_num} + quantmsioc map-spectrum-message-to-parquet --parquet_path ${resultsFile} --mzml_directory ./ --output_folder res/${outputDir} --file_num ${params.file_num} """ } } + +workflow { + Channel + .fromPath("${params.raw_dir}/*.raw") + .set{ rawFiles } + + msconvert(rawFiles) + generateResults(params.msms_file, params.output_dir) + extractInfoFromMzml(generateResults.out, msconvert.out, params.output_dir) + +} \ No newline at end of file diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config new file mode 100644 index 0000000..bf59bc8 --- /dev/null +++ b/nextflow/nextflow.config @@ -0,0 +1,90 @@ + +params { + chunksize = 1000000 + output_prefix_file = "psm" + file_num = 20 + partitions = "" +} + + +includeConfig 'conf/base.config' + +profiles { + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + + ebislurm{ + conda.enable = false + docker.enabled = false + singularity.enabled = true + executor { + name = "slurm" + queueSize = 1000 + submitRateLimit = "10/1sec" + exitReadTimeout = "30 min" + jobName = { + task.name // [] and " " not allowed in lsf job names + .replace("[", "(") + .replace("]", ")") + .replace(" ", "_") + } + } + singularity.autoMounts = false + singularity.runOptions = '-B /hps/nobackup/juan/pride/reanalysis:/hps/nobackup/juan/pride/reanalysis' + singularity.cacheDir = "/hps/nobackup/juan/pride/reanalysis/singularity/" + } +} + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} \ No newline at end of file