Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add STIMULUS/CHECKTORCHMODEL #6898

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
13 changes: 13 additions & 0 deletions modules/nf-core/stimulus/checktorchmodel/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
- bioconda

dependencies:
- conda-forge::matplotlib=3.9.2
- conda-forge::pandas=2.2.3
- conda-forge::polars=1.9.0
- conda-forge::python=3.12
- conda-forge::pytorch=2.4.1
- conda-forge::ray-core=2.37.0
- conda-forge::safetensors=0.4.5
- conda-forge::scikit-learn=1.5.2
59 changes: 59 additions & 0 deletions modules/nf-core/stimulus/checktorchmodel/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process STIMULUS_CHECKTORCHMODEL {
tag "$experiment_config - $original_csv"
label 'process_medium'

// TODO freeze to Wave
container "docker.io/mathysgrapotte/stimulus-py:latest"

input:
path(original_csv)
path(experiment_config)
path(model)
path(ray_tune_config)
path(initial_weights)

output:
path "*_modelcheck.log", emit: log
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: model.baseName.replaceFirst(/\.py/, "")
def weights_arg = initial_weights ? "--initial_weights ${initial_weights}" : ""
def gpu_arg = task.accelerator ? "--gpus ${task.accelerator.request}" : ""
"""
stimulus-check-model \
-d ${original_csv} \
-m ${model} \
-e ${experiment_config} \
-c ${ray_tune_config} \
${weights_arg} \
${gpu_arg} \
--cpus ${task.cpus} \
--memory "${task.memory}" \
$args > ${prefix}_modelcheck.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python --version | cut -d ' ' -f 2)
Stimulus-py: \$(pip show stimulus-py | grep Version | cut -d ' ' -f 2)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: model.baseName.replaceFirst(/\.py/, "")
def STIMULUS_VER = '0.0.9' // container not used in stub, change manually
"""
touch ${prefix}_modelcheck.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python --version | cut -d ' ' -f 2)
Stimulus-py: ${STIMULUS_VER}
END_VERSIONS
"""
}
64 changes: 64 additions & 0 deletions modules/nf-core/stimulus/checktorchmodel/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "stimulus_checktorchmodel"
description: Sanity check a Pytorch model using raytune
keywords:
- machine learning
- neural network
- unit test
tools:
- "stimulus":
description: "Stochastic Testing and Input Manipulation for Unbiased Learning Systems"
homepage: "https://github.com/mathysgrapotte/stimulus-py"
documentation: "https://github.com/mathysgrapotte/stimulus-py"
tool_dev_url: "https://github.com/mathysgrapotte/stimulus-py"
licence: ["MIT"]

input:
- - original_csv:
type: file
description: A CSV file with untransformed data
pattern: "*.csv"
ontologies:
- edam: "http://edamontology.org/format_3752"
- - experiment_config:
type: file
description: A YAML file describing data transformations
pattern: "*.{yml,yaml}"
ontologies:
- edam: "http://edamontology.org/format_3464"
- - model:
type: file
description: Python source file containing the model
pattern: "*.py"
ontologies:
- edam: "http://edamontology.org/format_3996"
- - ray_tune_config:
type: file
description: YAML file containing RayTune tuning parameters
pattern: "*.{yml,yaml}"
ontologies:
- edam: "http://edamontology.org/format_3750"
- - initial_weights:
type: file
description: An optional SafeTensors file containing initial weights
pattern: "*.safetensors"

output:
- log:
- "*_modelcheck.log":
type: file
description: Model check output
pattern: "*.log"
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@mathysgrapotte"
- "@alessiovignoli"
- "@itrujnara"
maintainers:
- "@itrujnara"
66 changes: 66 additions & 0 deletions modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// nf-core modules test stimulus/checktorchmodel
nextflow_process {

name "Test Process STIMULUS_CHECKTORCHMODEL"
script "../main.nf"
process "STIMULUS_CHECKTORCHMODEL"

tag "modules"
tag "modules_nfcore"
tag "stimulus"
tag "stimulus/checktorchmodel"

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used
test("basic - csv - py") {

when {
process {
"""
input[0] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
input[1] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.yaml', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/dna_to_float_model.py', checkIfExists: true)
input[3] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/tune_config.yaml', checkIfExists: true)
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ with(process.out.log) {
assert path(get(1)).readLines().first().contains("Trial finishes successfully")
} },
{ assert snapshot(process.out.versions).match() }
)
}

}

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
test("basic - csv - stub") {

options "-stub"

when {
process {
"""
input[0] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/input_data_with_split.csv', checkIfExists: true)
input[1] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/experiment_config.yaml', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/dna_to_float_model.py', checkIfExists: true)
input[3] = file(params.modules_testdata_base_path + '../../deepmodeloptim/testdata/dna_experiment/tune_config.yaml', checkIfExists: true)
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
37 changes: 37 additions & 0 deletions modules/nf-core/stimulus/checktorchmodel/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"basic - csv - py": {
"content": [
[

]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.0"
},
"timestamp": "2024-10-29T15:30:33.424103"
},
"basic - csv - stub": {
"content": [
{
"0": [
"dna_to_float_model_modelcheck.log:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"1": [
"versions.yml:md5,673aceb3844572cdb756ae9835f9187e"
],
"log": [
"dna_to_float_model_modelcheck.log:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"versions": [
"versions.yml:md5,673aceb3844572cdb756ae9835f9187e"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.0"
},
"timestamp": "2024-10-29T16:54:27.22745"
}
}
Loading