diff --git a/modules/nf-core/stimulus/checktorchmodel/environment.yml b/modules/nf-core/stimulus/checktorchmodel/environment.yml new file mode 100644 index 00000000000..fbf5fe9b102 --- /dev/null +++ b/modules/nf-core/stimulus/checktorchmodel/environment.yml @@ -0,0 +1,13 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::matplotlib=3.9.2 + - conda-forge::pandas=2.2.3 + - conda-forge::polars=1.9.0 + - conda-forge::python=3.12 + - conda-forge::pytorch=2.4.1 + - conda-forge::ray-core=2.37.0 + - conda-forge::safetensors=0.4.5 + - conda-forge::scikit-learn=1.5.2 diff --git a/modules/nf-core/stimulus/checktorchmodel/main.nf b/modules/nf-core/stimulus/checktorchmodel/main.nf new file mode 100644 index 00000000000..6a396377519 --- /dev/null +++ b/modules/nf-core/stimulus/checktorchmodel/main.nf @@ -0,0 +1,59 @@ +process STIMULUS_CHECKTORCHMODEL { + tag "$experiment_config - $original_csv" + label 'process_medium' + + // TODO freeze to Wave + container "docker.io/mathysgrapotte/stimulus-py:latest" + + input: + path(original_csv) + path(experiment_config) + path(model) + path(ray_tune_config) + path(initial_weights) + + output: + path "*_modelcheck.log", emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: model.baseName.replaceFirst(/\.py/, "") + def weights_arg = initial_weights ? "--initial_weights ${initial_weights}" : "" + def gpu_arg = task.accelerator ? "--gpus ${task.accelerator.request}" : "" + """ + stimulus-check-model \ + -d ${original_csv} \ + -m ${model} \ + -e ${experiment_config} \ + -c ${ray_tune_config} \ + ${weights_arg} \ + ${gpu_arg} \ + --cpus ${task.cpus} \ + --memory "${task.memory}" \ + $args > ${prefix}_modelcheck.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Python: \$(python --version | cut -d ' ' -f 2) + Stimulus-py: \$(pip show stimulus-py | grep Version | cut -d ' ' -f 2) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: model.baseName.replaceFirst(/\.py/, "") + def STIMULUS_VER = '0.0.9' // container not used in stub, change manually + """ + touch ${prefix}_modelcheck.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Python: \$(python --version | cut -d ' ' -f 2) + Stimulus-py: ${STIMULUS_VER} + END_VERSIONS + """ +} diff --git a/modules/nf-core/stimulus/checktorchmodel/meta.yml b/modules/nf-core/stimulus/checktorchmodel/meta.yml new file mode 100644 index 00000000000..c9a1274fbd5 --- /dev/null +++ b/modules/nf-core/stimulus/checktorchmodel/meta.yml @@ -0,0 +1,64 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "stimulus_checktorchmodel" +description: Sanity check a Pytorch model using raytune +keywords: + - machine learning + - neural network + - unit test +tools: + - "stimulus": + description: "Stochastic Testing and Input Manipulation for Unbiased Learning Systems" + homepage: "https://github.com/mathysgrapotte/stimulus-py" + documentation: "https://github.com/mathysgrapotte/stimulus-py" + tool_dev_url: "https://github.com/mathysgrapotte/stimulus-py" + licence: ["MIT"] + +input: + - - original_csv: + type: file + description: A CSV file with untransformed data + pattern: "*.csv" + ontologies: + - edam: "http://edamontology.org/format_3752" + - - experiment_config: + type: file + description: A YAML file describing data transformations + pattern: "*.{yml,yaml}" + ontologies: + - edam: "http://edamontology.org/format_3464" + - - model: + type: file + description: Python source file containing the model + pattern: "*.py" + ontologies: + - edam: "http://edamontology.org/format_3996" + - - ray_tune_config: + type: file + description: YAML file containing RayTune tuning parameters + pattern: "*.{yml,yaml}" + ontologies: + - edam: "http://edamontology.org/format_3750" + - - initial_weights: + type: file + description: An optional SafeTensors file containing initial weights + pattern: "*.safetensors" + +output: + - log: + - "*_modelcheck.log": + type: file + description: Model check output + pattern: "*.log" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@mathysgrapotte" + - "@alessiovignoli" + - "@itrujnara" +maintainers: + - "@itrujnara" diff --git a/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test b/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test new file mode 100644 index 00000000000..626232d8233 --- /dev/null +++ b/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test @@ -0,0 +1,66 @@ +// nf-core modules test stimulus/checktorchmodel +nextflow_process { + + name "Test Process STIMULUS_CHECKTORCHMODEL" + script "../main.nf" + process "STIMULUS_CHECKTORCHMODEL" + + tag "modules" + tag "modules_nfcore" + tag "stimulus" + tag "stimulus/checktorchmodel" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("basic - csv - py") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.yaml', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/dna_to_float_model.py', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/tune_config.yaml', checkIfExists: true) + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out.log) { + assert path(get(1)).readLines().first().contains("Trial finishes successfully") + } }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("basic - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.yaml', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/dna_to_float_model.py', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/tune_config.yaml', checkIfExists: true) + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test.snap b/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test.snap new file mode 100644 index 00000000000..9e94a8e67c4 --- /dev/null +++ b/modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "basic - csv - py": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-29T15:30:33.424103" + }, + "basic - csv - stub": { + "content": [ + { + "0": [ + "dna_to_float_model_modelcheck.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,673aceb3844572cdb756ae9835f9187e" + ], + "log": [ + "dna_to_float_model_modelcheck.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,673aceb3844572cdb756ae9835f9187e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-29T16:54:27.22745" + } +} \ No newline at end of file