From f4dcca2160d5e278673c8d9ef136f6c2c91e68a9 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 18 Sep 2024 21:39:31 +0200
Subject: [PATCH] Update workflows (#2)

* use cxg_mouse_pancreas_atlas instead of pancreas

* update dependencies and remove unnecessary arguments

* update scripts and test resources

* fix paths

* update benchmarking workflow

* update benchmark workflow

* fix default

* Rename "functionality_name" to "name"

* Update benchmark workflow

Now runs on local test

* Update run_benchmark workflow config

* Set numpy<2.0.0 for pymde and phate methods

Avoids "numpy.ndarray size changed, may indicate binary incompatibility" error

* also create a state.yaml file

* Update run_test_seqeracloud.sh script

* Update run full benchmark scripts

* Update CHANGELOG

* Add all methods/metrics to benchmark workflow

* Add dependencies to benchwark workflow config

---------

Co-authored-by: Luke Zappia <lazappi@users.noreply.github.com>
---
 CHANGELOG.md                                  |   6 +
 README.md                                     |  10 +-
 _viash.yaml                                   |  15 +-
 common                                        |   2 +-
 scripts/.gitignore                            |   3 -
 scripts/create_component/.gitignore           |   2 +
 .../create_component/create_python_method.sh  |   8 +
 .../create_component/create_python_metric.sh  |   8 +
 scripts/create_component/create_r_method.sh   |   8 +
 scripts/create_component/create_r_metric.sh   |   8 +
 scripts/create_readme.sh                      |   4 +-
 scripts/create_resources/resources.sh         |  26 ++
 scripts/create_resources/test_resources.sh    |  44 ++++
 scripts/create_test_resources.sh              |  38 ---
 scripts/download_resources.sh                 |   9 -
 scripts/project/build_all_components.sh       |   6 +
 .../project/build_all_docker_containers.sh    |   7 +
 scripts/{ => project}/test_all_components.sh  |   4 +-
 scripts/run_benchmark.sh                      |  23 --
 scripts/run_benchmark/run_full_local.sh       |  47 ++++
 scripts/run_benchmark/run_full_seqeracloud.sh |  40 +++
 scripts/run_benchmark/run_test_local.sh       |  32 +++
 scripts/run_benchmark/run_test_seqeracloud.sh |  35 +++
 scripts/run_benchmark_test.sh                 |  19 --
 scripts/sync_resources.sh                     |   5 +
 src/api/comp_control_method.yaml              |   4 +-
 src/api/comp_method.yaml                      |   4 +-
 src/api/comp_metric.yaml                      |   4 +-
 src/api/comp_process_dataset.yaml             |   4 +-
 src/api/file_common_dataset.yaml              |   2 +-
 src/api/file_dataset.yaml                     |   2 +-
 src/api/file_embedding.yaml                   |   2 +-
 src/api/file_score.yaml                       |   2 +-
 src/api/file_solution.yaml                    |   2 +-
 src/control_methods/random_features/script.py |   6 +-
 .../spectral_features/script.py               |   6 +-
 src/control_methods/true_features/script.py   |   6 +-
 .../process_dataset/config.vsh.yaml           |  10 -
 src/data_processors/process_dataset/script.py |  20 +-
 src/methods/densmap/script.py                 |   6 +-
 src/methods/diffusion_map/script.R            |   4 +-
 src/methods/ivis/script.py                    |   6 +-
 src/methods/lmds/script.R                     |   4 +-
 src/methods/neuralee/script.py                |   6 +-
 src/methods/pca/script.py                     |   6 +-
 src/methods/phate/config.vsh.yaml             |   1 +
 src/methods/phate/script.py                   |   6 +-
 src/methods/pymde/config.vsh.yaml             |   4 +-
 src/methods/pymde/script.py                   |   6 +-
 src/methods/simlr/script.R                    |   6 +-
 src/methods/tsne/script.py                    |   6 +-
 src/methods/umap/script.py                    |   6 +-
 src/metrics/clustering_performance/script.py  |   6 +-
 src/metrics/coranking/script.R                |   4 +-
 src/metrics/density_preservation/script.py    |   4 +-
 src/metrics/distance_correlation/script.py    |   4 +-
 src/metrics/trustworthiness/script.py         |   4 +-
 .../process_datasets/config.vsh.yaml          |  44 ++--
 src/workflows/process_datasets/main.nf        | 129 +---------
 src/workflows/run_benchmark/config.vsh.yaml   | 130 +++++-----
 src/workflows/run_benchmark/main.nf           | 229 +++++-------------
 61 files changed, 529 insertions(+), 575 deletions(-)
 delete mode 100644 scripts/.gitignore
 create mode 100644 scripts/create_component/.gitignore
 create mode 100755 scripts/create_component/create_python_method.sh
 create mode 100755 scripts/create_component/create_python_metric.sh
 create mode 100755 scripts/create_component/create_r_method.sh
 create mode 100755 scripts/create_component/create_r_metric.sh
 create mode 100755 scripts/create_resources/resources.sh
 create mode 100755 scripts/create_resources/test_resources.sh
 delete mode 100644 scripts/create_test_resources.sh
 delete mode 100755 scripts/download_resources.sh
 create mode 100755 scripts/project/build_all_components.sh
 create mode 100755 scripts/project/build_all_docker_containers.sh
 rename scripts/{ => project}/test_all_components.sh (75%)
 delete mode 100644 scripts/run_benchmark.sh
 create mode 100755 scripts/run_benchmark/run_full_local.sh
 create mode 100755 scripts/run_benchmark/run_full_seqeracloud.sh
 create mode 100755 scripts/run_benchmark/run_test_local.sh
 create mode 100755 scripts/run_benchmark/run_test_seqeracloud.sh
 delete mode 100644 scripts/run_benchmark_test.sh
 create mode 100755 scripts/sync_resources.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 873699a..fe0b62b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,12 @@
 
 ## BUGFIXES -->
 
+# dimensionality_reduction 0.1.1 2024-09-18
+
+## NEW FUNCTIONALITY
+
+* Updated workflows to work correctly for this task (PR #2)
+
 # dimensionality_reduction 0.1.0 2024-09-05
 
 ## NEW FUNCTIONALITY
diff --git a/README.md b/README.md
index 729e6ea..7497578 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ flowchart LR
 
 The dataset to pass to a method.
 
-Example file: `resources_test/common/pancreas/dataset.h5ad`
+Example file: `resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad`
 
 Format:
 
@@ -149,7 +149,7 @@ Arguments:
 The dataset to pass to a method.
 
 Example file:
-`resources_test/dimensionality_reduction/pancreas/dataset.h5ad`
+`resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad`
 
 Format:
 
@@ -181,7 +181,7 @@ Data structure:
 The data for evaluating a dimensionality reduction.
 
 Example file:
-`resources_test/dimensionality_reduction/pancreas/solution.h5ad`
+`resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad`
 
 Format:
 
@@ -268,7 +268,7 @@ Arguments:
 A dataset with dimensionality reduction embedding.
 
 Example file:
-`resources_test/dimensionality_reduction/pancreas/embedding.h5ad`
+`resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/embedding.h5ad`
 
 Format:
 
@@ -298,7 +298,7 @@ Data structure:
 Metric score file
 
 Example file:
-`resources_test/dimensionality_reduction/pancreas/score.h5ad`
+`resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/score.h5ad`
 
 Format:
 
diff --git a/_viash.yaml b/_viash.yaml
index 09c27e5..d3142cf 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -67,11 +67,11 @@ info:
   # Step 5: Replace the task_template to the name of the task.
   test_resources:
     - type: s3
-      path: s3://openproblems-data/resources_test/common/pancreas/
-      dest: resources_test/common/pancreas/
+      path: s3://openproblems-data/resources_test/common/cxg_mouse_pancreas_atlas/
+      dest: resources_test/common/cxg_mouse_pancreas_atlas/
     - type: s3
-      path: s3://openproblems-data/resources_test/dimensionality_reduction/
-      dest: resources_test/dimensionality_reduction
+      path: s3://openproblems-data/resources_test/task_dimensionality_reduction/
+      dest: resources_test/task_dimensionality_reduction
 
 # Step 6: Update the authors of the task.
 authors:
@@ -121,7 +121,8 @@ config_mods: |
   .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
 
 repositories:
-  - name: openproblems-v2
+  - name: core
     type: github
-    repo: openproblems-bio/openproblems-v2
-    tag: main_build
+    repo: openproblems-bio/core
+    tag: build/main
+    path: viash/core
diff --git a/common b/common
index 1660eef..f264283 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 1660eef0b1172c1059270fff77f9abc0a5fc1ea4
+Subproject commit f2642835c89264e0a43e87e3f6c588c6be4902e7
diff --git a/scripts/.gitignore b/scripts/.gitignore
deleted file mode 100644
index 2f7ffd3..0000000
--- a/scripts/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-add_a_method.sh
-add_a_control_method.sh
-add_a_metric.sh
\ No newline at end of file
diff --git a/scripts/create_component/.gitignore b/scripts/create_component/.gitignore
new file mode 100644
index 0000000..09380f9
--- /dev/null
+++ b/scripts/create_component/.gitignore
@@ -0,0 +1,2 @@
+# if users change the scripts, the changes should not be committed.
+/create_*_*.sh
\ No newline at end of file
diff --git a/scripts/create_component/create_python_method.sh b/scripts/create_component/create_python_method.sh
new file mode 100755
index 0000000..b96c05d
--- /dev/null
+++ b/scripts/create_component/create_python_method.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+common/scripts/create_component \
+  --name my_python_method \
+  --language python \
+  --type method
diff --git a/scripts/create_component/create_python_metric.sh b/scripts/create_component/create_python_metric.sh
new file mode 100755
index 0000000..d36bc7a
--- /dev/null
+++ b/scripts/create_component/create_python_metric.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+common/scripts/create_component \
+  --name my_python_metric \
+  --language python \
+  --type metric
diff --git a/scripts/create_component/create_r_method.sh b/scripts/create_component/create_r_method.sh
new file mode 100755
index 0000000..0ab0394
--- /dev/null
+++ b/scripts/create_component/create_r_method.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+common/scripts/create_component \
+  --name my_r_method \
+  --language r \
+  --type method
diff --git a/scripts/create_component/create_r_metric.sh b/scripts/create_component/create_r_metric.sh
new file mode 100755
index 0000000..1a4794e
--- /dev/null
+++ b/scripts/create_component/create_r_metric.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+common/scripts/create_component \
+  --name my_r_metric \
+  --language r \
+  --type metric
diff --git a/scripts/create_readme.sh b/scripts/create_readme.sh
index 5a5544a..0ed7aaf 100755
--- a/scripts/create_readme.sh
+++ b/scripts/create_readme.sh
@@ -1,3 +1,5 @@
 #!/bin/bash
 
-common/scripts/create_task_readme
\ No newline at end of file
+set -e
+
+common/scripts/create_task_readme --input src/api
diff --git a/scripts/create_resources/resources.sh b/scripts/create_resources/resources.sh
new file mode 100755
index 0000000..a733e4c
--- /dev/null
+++ b/scripts/create_resources/resources.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+cat > /tmp/params.yaml << 'HERE'
+input_states: s3://openproblems-data/resources/datasets/**/state.yaml
+rename_keys: 'input:output_dataset'
+output_state: '$id/state.yaml'
+settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad"}'
+publish_dir: s3://openproblems-data/resources/task_dimensionality_reduction/datasets/
+HERE
+
+tw launch https://github.com/openproblems-bio/task_dimensionality_reduction.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/process_datasets/main.nf \
+  --workspace 53907369739130 \
+  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --params-file /tmp/params.yaml \
+  --entry-name auto \
+  --config common/nextflow_helpers/labels_tw.config \
+  --labels task_dimensionality_reduction,process_datasets
diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh
new file mode 100755
index 0000000..287504a
--- /dev/null
+++ b/scripts/create_resources/test_resources.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+RAW_DATA=resources_test/common
+DATASET_DIR=resources_test/task_dimensionality_reduction
+
+mkdir -p $DATASET_DIR
+
+# process dataset
+echo Running process_dataset
+viash run src/data_processors/process_dataset/config.vsh.yaml -- \
+    --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \
+    --output_dataset $DATASET_DIR/cxg_mouse_pancreas_atlas/dataset.h5ad \
+    --output_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad
+
+# run one method
+viash run src/methods/pca/config.vsh.yaml -- \
+    --input $DATASET_DIR/cxg_mouse_pancreas_atlas/dataset.h5ad \
+    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/embedding.h5ad
+
+# run one metric
+viash run src/metrics/clustering_performance/config.vsh.yaml -- \
+    --input_embedding $DATASET_DIR/cxg_mouse_pancreas_atlas/embedding.h5ad \
+    --input_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad \
+    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad
+
+cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE
+id: cxg_mouse_pancreas_atlas
+output_dataset: !file dataset.h5ad
+output_solution: !file solution.h5ad
+HERE
+
+# only run this if you have access to the openproblems-data bucket
+aws s3 sync --profile op \
+  "resources_test/task_dimensionality_reduction" \
+   s3://openproblems-data/resources_test/task_dimensionality_reduction \
+  --delete --dryrun
diff --git a/scripts/create_test_resources.sh b/scripts/create_test_resources.sh
deleted file mode 100644
index a39f8c4..0000000
--- a/scripts/create_test_resources.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-# get the root of the directory
-REPO_ROOT=$(git rev-parse --show-toplevel)
-
-# ensure that the command below is run from the root of the repository
-cd "$REPO_ROOT"
-
-set -e
-
-RAW_DATA=resources_test/common
-DATASET_DIR=resources_test/task_template
-
-mkdir -p $DATASET_DIR
-
-# process dataset
-echo Running process_dataset
-nextflow run . \
-  -main-script target/nextflow/workflows/process_datasets/main.nf \
-  -profile docker \
-  -entry auto \
-  --input_states "$RAW_DATA/**/state.yaml" \
-  --rename_keys 'input:output_dataset' \
-  --settings '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' \
-  --publish_dir "$DATASET_DIR" \
-  --output_state '$id/state.yaml'
-
-# run one method
-viash run src/methods/logistic_regression/config.vsh.yaml -- \
-    --input_train $DATASET_DIR/pancreas/train.h5ad \
-    --input_test $DATASET_DIR/pancreas/test.h5ad \
-    --output $DATASET_DIR/pancreas/denoised.h5ad
-
-# run one metric
-viash run src/metrics/accuracy/config.vsh.yaml -- \
-    --input_predicition $DATASET_DIR/pancreas/predicted.h5ad \
-    --input_solution $DATASET_DIR/pancreas/solution.h5ad \
-    --output $DATASET_DIR/pancreas/score.h5ad
\ No newline at end of file
diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh
deleted file mode 100755
index 74cc033..0000000
--- a/scripts/download_resources.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-set -e
-
-echo ">> Downloading resources"
-
-# the sync_resources script uses the test_resources S3 URI's in the _viash.yaml to download the resources.
-common/scripts/sync_resources \
-  --delete
diff --git a/scripts/project/build_all_components.sh b/scripts/project/build_all_components.sh
new file mode 100755
index 0000000..4e90d91
--- /dev/null
+++ b/scripts/project/build_all_components.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+
+# Build all components in a namespace (refer https://viash.io/reference/cli/ns_build.html)
+viash ns build --parallel
diff --git a/scripts/project/build_all_docker_containers.sh b/scripts/project/build_all_docker_containers.sh
new file mode 100755
index 0000000..5d43639
--- /dev/null
+++ b/scripts/project/build_all_docker_containers.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+set -e
+
+# Build all components in a namespace (refer https://viash.io/reference/cli/ns_build.html)
+# and set up the container via a cached build
+viash ns build --parallel --setup cachedbuild
diff --git a/scripts/test_all_components.sh b/scripts/project/test_all_components.sh
similarity index 75%
rename from scripts/test_all_components.sh
rename to scripts/project/test_all_components.sh
index cd016e9..8a08afd 100755
--- a/scripts/test_all_components.sh
+++ b/scripts/project/test_all_components.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
 
+set -e
+
 # Test all components in a namespace (refer https://viash.io/reference/cli/ns_test.html)
-viash ns test --parallel
\ No newline at end of file
+viash ns test --parallel
diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh
deleted file mode 100644
index cc4275e..0000000
--- a/scripts/run_benchmark.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
-publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
-
-# make sure only log_cp10k is used
-cat > /tmp/params.yaml << HERE
-input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
-rename_keys: 'input_train:output_train;input_test:output_test'
-output_state: "state.yaml"
-publish_dir: "$publish_dir"
-HERE
-
-tw launch https://github.com/openproblems-bio/task_template.git \
-  --revision build/main \
-  --pull-latest \
-  --main-script target/nextflow/workflows/run_benchmark/main.nf \
-  --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
-  --params-file /tmp/params.yaml \
-  --entry-name auto \
-  --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,full
\ No newline at end of file
diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
new file mode 100755
index 0000000..dcc2c2f
--- /dev/null
+++ b/scripts/run_benchmark/run_full_local.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+# NOTE: depending on the the datasets and components, you may need to launch this workflow
+# on a different compute platform (e.g. a HPC, AWS Cloud, Azure Cloud, Google Cloud).
+# please refer to the nextflow information for more details:
+# https://www.nextflow.io/docs/latest/
+
+# remove this when you have implemented the script
+# echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
+# echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+# echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
+# echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
+# echo "  Step 4: remove this message"
+# exit 1
+
+set -e
+
+echo "Running benchmark on test data"
+echo "  Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
+
+# generate a unique id
+RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir="resources/results/${RUN_ID}"
+
+# write the parameters to file
+cat > /tmp/params.yaml << HERE
+input_states: resources/datasets/**/state.yaml
+rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+# run the benchmark
+nextflow run openproblems-bio/task_dimensionality_reduction \
+  --revision build/main \
+  -main-script target/nextflow/workflows/run_benchmark/main.nf \
+  -profile docker \
+  -resume \
+  -entry auto \
+  -c common/nextflow_helpers/labels_ci.config \
+  -params-file /tmp/params.yaml
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
new file mode 100755
index 0000000..3bae258
--- /dev/null
+++ b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+# remove this when you have implemented the script
+# echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
+# echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+# echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
+# echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
+# echo "  Step 4: remove this message"
+# exit 1
+
+set -e
+
+# generate a unique id
+RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir="s3://openproblems-data/resources/task_dimensionality_reduction/results/${RUN_ID}"
+
+# write the parameters to file
+cat > /tmp/params.yaml << HERE
+input_states: s3://openproblems-data/resources/task_dimensionality_reduction/datasets/**/state.yaml
+rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+tw launch https://github.com/openproblems-bio/task_dimensionality_reduction.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/run_benchmark/main.nf \
+  --workspace 53907369739130 \
+  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --params-file /tmp/params.yaml \
+  --entry-name auto \
+  --config common/nextflow_helpers/labels_tw.config \
+  --labels task_dimensionality_reduction,full
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
new file mode 100755
index 0000000..f1a3b27
--- /dev/null
+++ b/scripts/run_benchmark/run_test_local.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+echo "Running benchmark on test data"
+echo "  Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
+
+# generate a unique id
+RUN_ID="testrun_$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir="temp/results/${RUN_ID}"
+
+# write the parameters to file
+cat > /tmp/params.yaml << HERE
+id: cxg_mouse_pancreas_atlas
+input_dataset: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad"
+input_solution: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad"
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+nextflow run . \
+  -main-script target/nextflow/workflows/run_benchmark/main.nf \
+  -profile docker \
+  -resume \
+  -c common/nextflow_helpers/labels_ci.config \
+  -params-file /tmp/params.yaml
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
new file mode 100755
index 0000000..77e0481
--- /dev/null
+++ b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+# # remove this when you have implemented the script
+# echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
+# echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+# echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
+# echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
+# echo "  Step 4: remove this message"
+# exit 1
+
+set -e
+
+# write the parameters to file
+cat > /tmp/params.yaml << 'HERE'
+input_dataset: s3://openproblems-data/resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad
+input_solution: s3://openproblems-data/resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad
+output_state: "state.yaml"
+publish_dir: s3://openproblems-nextflow/temp/task_dimensionality_reduction/
+HERE
+
+tw launch https://github.com/openproblems-bio/task_dimensionality_reduction.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/run_benchmark/main.nf \
+  --workspace 53907369739130 \
+  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --params-file /tmp/params.yaml \
+  --config common/nextflow_helpers/labels_tw.config \
+  --labels task_dimensionality_reduction,test
diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh
deleted file mode 100644
index 6c03d42..0000000
--- a/scripts/run_benchmark_test.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-cat > /tmp/params.yaml << 'HERE'
-input_states: s3://openproblems-data/resources_test/task_template/**/state.yaml
-rename_keys: 'input_train:output_train;input_test:output_test'
-output_state: "state.yaml"
-publish_dir: s3://openproblems-nextflow/temp/task_template/
-HERE
-
-tw launch https://github.com/openproblems-bio/task_template.git \
-  --revision build/main \
-  --pull-latest \
-  --main-script target/nextflow/workflows/run_benchmark/main.nf \
-  --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
-  --params-file /tmp/params.yaml \
-  --entry-name auto \
-  --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,test
\ No newline at end of file
diff --git a/scripts/sync_resources.sh b/scripts/sync_resources.sh
new file mode 100755
index 0000000..20b87e7
--- /dev/null
+++ b/scripts/sync_resources.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -e
+
+common/scripts/sync_resources
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
index 32e36e1..2394ee8 100644
--- a/src/api/comp_control_method.yaml
+++ b/src/api/comp_control_method.yaml
@@ -24,8 +24,8 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/dimensionality_reduction/pancreas/
-    dest: resources_test/dimensionality_reduction/pancreas/
+  - path: /resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
+    dest: resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
index 9d5a856..8c435c4 100644
--- a/src/api/comp_method.yaml
+++ b/src/api/comp_method.yaml
@@ -17,8 +17,8 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/dimensionality_reduction/pancreas/
-    dest: resources_test/dimensionality_reduction/pancreas/
+  - path: /resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
+    dest: resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
index d8fc778..dc2bdff 100644
--- a/src/api/comp_metric.yaml
+++ b/src/api/comp_metric.yaml
@@ -20,8 +20,8 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/dimensionality_reduction/pancreas/
-    dest: resources_test/dimensionality_reduction/pancreas/
+  - path: /resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
+    dest: resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script
diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml
index a8a3555..8db8527 100644
--- a/src/api/comp_process_dataset.yaml
+++ b/src/api/comp_process_dataset.yaml
@@ -20,7 +20,7 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/common/pancreas/
-    dest: resources_test/common/pancreas/
+  - path: /resources_test/common/cxg_mouse_pancreas_atlas/
+    dest: resources_test/common/cxg_mouse_pancreas_atlas/
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml
index 05a1d46..eb7462e 100644
--- a/src/api/file_common_dataset.yaml
+++ b/src/api/file_common_dataset.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/common/pancreas/dataset.h5ad"
+example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad"
 label: "Dataset"
 summary: "The dataset to pass to a method."
 info:
diff --git a/src/api/file_dataset.yaml b/src/api/file_dataset.yaml
index fb94287..3f467ed 100644
--- a/src/api/file_dataset.yaml
+++ b/src/api/file_dataset.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/dimensionality_reduction/pancreas/dataset.h5ad"
+example: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad"
 label: "Dataset"
 summary: "The dataset to pass to a method."
 info:
diff --git a/src/api/file_embedding.yaml b/src/api/file_embedding.yaml
index cb6254c..fcaaa41 100644
--- a/src/api/file_embedding.yaml
+++ b/src/api/file_embedding.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/dimensionality_reduction/pancreas/embedding.h5ad"
+example: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/embedding.h5ad"
 label: "Embedding"
 summary: "A dataset with dimensionality reduction embedding."
 info:
diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml
index 286ab81..c5e139b 100644
--- a/src/api/file_score.yaml
+++ b/src/api/file_score.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/dimensionality_reduction/pancreas/score.h5ad"
+example: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/score.h5ad"
 label: "Score"
 summary: "Metric score file"
 info:
diff --git a/src/api/file_solution.yaml b/src/api/file_solution.yaml
index 3f69f1c..b8fe4ad 100644
--- a/src/api/file_solution.yaml
+++ b/src/api/file_solution.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/dimensionality_reduction/pancreas/solution.h5ad"
+example: "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad"
 label: "Test data"
 summary: "The data for evaluating a dimensionality reduction."
 info:
diff --git a/src/control_methods/random_features/script.py b/src/control_methods/random_features/script.py
index 0fcac25..22de821 100644
--- a/src/control_methods/random_features/script.py
+++ b/src/control_methods/random_features/script.py
@@ -3,11 +3,11 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
     "output": "reduced.h5ad",
 }
 meta = {
-    "functionality_name": "random_features",
+    "name": "random_features",
 }
 ## VIASH END
 
@@ -24,7 +24,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/control_methods/spectral_features/script.py b/src/control_methods/spectral_features/script.py
index a68e40c..6a2f142 100644
--- a/src/control_methods/spectral_features/script.py
+++ b/src/control_methods/spectral_features/script.py
@@ -3,12 +3,12 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
     "output": "reduced.h5ad",
     "n_comps": 2,
 }
 meta = {
-    "functionality_name": "spectral_features",
+    "name": "spectral_features",
 }
 ## VIASH END
 
@@ -64,7 +64,7 @@ def diffusion_map(graph, n_comps, t, n_retries):
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/control_methods/true_features/script.py b/src/control_methods/true_features/script.py
index 52f701d..883b500 100644
--- a/src/control_methods/true_features/script.py
+++ b/src/control_methods/true_features/script.py
@@ -2,11 +2,11 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
     "output": "reduced.h5ad",
 }
 meta = {
-    "functionality_name": "true_features",
+    "name": "true_features",
 }
 ## VIASH END
 
@@ -23,7 +23,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/data_processors/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml
index 9663538..f672e4e 100644
--- a/src/data_processors/process_dataset/config.vsh.yaml
+++ b/src/data_processors/process_dataset/config.vsh.yaml
@@ -3,18 +3,8 @@ __merge__: /src/api/comp_process_dataset.yaml
 
 # Component configuration
 name: process_dataset
-status: disabled
 
 # Script configuration
-arguments:
-  - name: "--obs_label"
-    type: "string"
-    description: "Which .obs slot to use as label."
-    default: "cell_type"
-  - name: "--var_hvg_score"
-    type: "string"
-    description: "Which .var slot to use as the hvg score."
-    default: "hvg_score"
 resources:
   - type: python_script
     path: script.py
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py
index ffc4710..531a353 100644
--- a/src/data_processors/process_dataset/script.py
+++ b/src/data_processors/process_dataset/script.py
@@ -5,9 +5,7 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/common/pancreas/dataset.h5ad",
-    "obs_label": "cell_type",
-    "var_hvg_score": "hvg_score",
+    "input": "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad",
     "output_dataset": "train.h5ad",
     "output_solution": "test.h5ad",
 }
@@ -29,23 +27,11 @@
 
 print(adata)
 
-# Subset the different adatas
-print(">> Figuring which data needs to be copied to which output file", flush=True)
-# Use par arguments to look for values in different slots
-slot_mapping = {
-    "obs": {
-        "label": par["obs_label"],
-    },
-    "var": {
-        "hvg_score": par["var_hvg_score"],
-    },
-}
-
 print(">> Creating input data", flush=True)
-output_dataset = subset_h5ad_by_format(adata, config, "output_dataset", slot_mapping)
+output_dataset = subset_h5ad_by_format(adata, config, "output_dataset")
 
 print(">> Creating solution data", flush=True)
-output_solution = subset_h5ad_by_format(adata, config, "output_solution", slot_mapping)
+output_solution = subset_h5ad_by_format(adata, config, "output_solution")
 
 print(">> Writing data", flush=True)
 output_dataset.write_h5ad(par["output_dataset"])
diff --git a/src/methods/densmap/script.py b/src/methods/densmap/script.py
index 2510507..d331f04 100644
--- a/src/methods/densmap/script.py
+++ b/src/methods/densmap/script.py
@@ -4,13 +4,13 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_pca_dims": 50,
     "n_hvg": 1000,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "densmap",
 }
 ## VIASH END
 
@@ -40,7 +40,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/diffusion_map/script.R b/src/methods/diffusion_map/script.R
index eaa7cba..5108ab2 100644
--- a/src/methods/diffusion_map/script.R
+++ b/src/methods/diffusion_map/script.R
@@ -1,6 +1,6 @@
 ## VIASH START
 par <- list(
-  input = "resources_test/dimensionality_reduction/pancreas/dataset.h5ad",
+  input = "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad",
   output = "output.h5ad",
   n_dim = 2
 )
@@ -24,7 +24,7 @@ output <- anndata::AnnData(
   uns = list(
     dataset_id = input$uns[["dataset_id"]],
     normalization_id = input$uns[["normalization_id"]],
-    method_id = meta$functionality_name
+    method_id = meta$name
   ),
   obsm = list(
     X_emb = X_emb
diff --git a/src/methods/ivis/script.py b/src/methods/ivis/script.py
index 6c67efc..9620325 100644
--- a/src/methods/ivis/script.py
+++ b/src/methods/ivis/script.py
@@ -6,13 +6,13 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/dataset.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad",
     "output": "reduced.h5ad",
     "n_hvg": 1000,
     "n_pca_dims": 50,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "ivis",
 }
 ## VIASH END
 
@@ -47,7 +47,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/lmds/script.R b/src/methods/lmds/script.R
index 71167ef..26771f8 100644
--- a/src/methods/lmds/script.R
+++ b/src/methods/lmds/script.R
@@ -1,6 +1,6 @@
 ## VIASH START
 par <- list(
-  input = "resources_test/dimensionality_reduction/pancreas/dataset.h5ad",
+  input = "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad",
   output = "output.h5ad",
   n_dim = 3,
   n_landmarks = 1000,
@@ -25,7 +25,7 @@ message("Write output AnnData to file")
 output <- anndata::AnnData(
   uns = list(
     dataset_id = input$uns[["dataset_id"]],
-    method_id = meta$functionality_name,
+    method_id = meta$name,
     normalization_id = input$uns[["normalization_id"]]
   ),
   obsm = list(
diff --git a/src/methods/neuralee/script.py b/src/methods/neuralee/script.py
index 61e05c4..78d6e61 100644
--- a/src/methods/neuralee/script.py
+++ b/src/methods/neuralee/script.py
@@ -8,14 +8,14 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_hvg": 1000,
     "n_iter": 10,
     "normalize": True,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "neuralee",
 }
 ## VIASH END
 
@@ -68,7 +68,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/pca/script.py b/src/methods/pca/script.py
index b04ab01..8137a8f 100644
--- a/src/methods/pca/script.py
+++ b/src/methods/pca/script.py
@@ -3,12 +3,12 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_hvg": 1000,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "pca",
 }
 ## VIASH END
 
@@ -31,7 +31,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/phate/config.vsh.yaml b/src/methods/phate/config.vsh.yaml
index 038c4e7..853fda6 100644
--- a/src/methods/phate/config.vsh.yaml
+++ b/src/methods/phate/config.vsh.yaml
@@ -58,6 +58,7 @@ engines:
           - phate==1.0.*
           - scprep
           - "scikit-learn<1.2"
+          - numpy<2.0.0 # Avoid "numpy.ndarray size changed, may indicate binary incompatibility" error
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/methods/phate/script.py b/src/methods/phate/script.py
index 003b467..0daa002 100644
--- a/src/methods/phate/script.py
+++ b/src/methods/phate/script.py
@@ -3,14 +3,14 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_pca_dims": 50,
     "n_hvg": 1000,
     "gamma": 1,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "phate",
 }
 ## VIASH END
 
@@ -35,7 +35,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/pymde/config.vsh.yaml b/src/methods/pymde/config.vsh.yaml
index 2bb7987..7d7ed8f 100644
--- a/src/methods/pymde/config.vsh.yaml
+++ b/src/methods/pymde/config.vsh.yaml
@@ -46,7 +46,9 @@ engines:
     image: openproblems/base_python:1.0.0
     setup:
       - type: python
-        packages: pymde
+        packages:
+          - pymde
+          - numpy<2.0.0 # Avoid "numpy.ndarray size changed, may indicate binary incompatibility" error, see https://github.com/cvxgrp/pymde/issues/19
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/methods/pymde/script.py b/src/methods/pymde/script.py
index 0483dad..8e42ac2 100644
--- a/src/methods/pymde/script.py
+++ b/src/methods/pymde/script.py
@@ -4,14 +4,14 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/dataset.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad",
     "output": "reduced.h5ad",
     "embed_method": "neighbors",
     "n_hvg": 1000,
     "n_pca_dims": 50,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "pymde",
 }
 ## VIASH END
 
@@ -46,7 +46,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/simlr/script.R b/src/methods/simlr/script.R
index 9591690..e194b02 100644
--- a/src/methods/simlr/script.R
+++ b/src/methods/simlr/script.R
@@ -1,6 +1,6 @@
 ## VIASH START
 par <- list(
-  input = "resources_test/dimensionality_reduction/pancreas/dataset.h5ad",
+  input = "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/dataset.h5ad",
   output = "output.h5ad",
   n_clusters = NULL,
   n_dim = NA,
@@ -10,7 +10,7 @@ par <- list(
   cores_ratio = 1
 )
 meta <- list(
-  functionality_name = "simlr"
+  name = "simlr"
 )
 ## VIASH END
 
@@ -55,7 +55,7 @@ message("Write output AnnData to file")
 output <- anndata::AnnData(
   uns = list(
     dataset_id = input$uns[["dataset_id"]],
-    method_id = meta$functionality_name,
+    method_id = meta$name,
     normalization_id = input$uns[["normalization_id"]]
   ),
   obsm = list(
diff --git a/src/methods/tsne/script.py b/src/methods/tsne/script.py
index 82e0367..50e5d82 100644
--- a/src/methods/tsne/script.py
+++ b/src/methods/tsne/script.py
@@ -3,13 +3,13 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_pca_dims": 50,
     "n_hvg": 1000,
 }
 meta = {
-    "functionality_name": "foo",
+    "name": "tsne",
 }
 ## VIASH END
 
@@ -37,7 +37,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/methods/umap/script.py b/src/methods/umap/script.py
index 69b885d..128222f 100644
--- a/src/methods/umap/script.py
+++ b/src/methods/umap/script.py
@@ -4,13 +4,13 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/dimensionality_reduction/pancreas/train.h5ad",
+    "input": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/train.h5ad",
     "output": "reduced.h5ad",
     "n_pca_dims": 50,
     "n_hvg": 1000,
 }
 meta = {
-    "functionality_name": "umap",
+    "name": "umap",
 }
 ## VIASH END
 
@@ -40,7 +40,7 @@
     uns={
         "dataset_id": input.uns["dataset_id"],
         "normalization_id": input.uns["normalization_id"],
-        "method_id": meta["functionality_name"],
+        "method_id": meta["name"],
     },
 )
 
diff --git a/src/metrics/clustering_performance/script.py b/src/metrics/clustering_performance/script.py
index de66550..5f33886 100644
--- a/src/metrics/clustering_performance/script.py
+++ b/src/metrics/clustering_performance/script.py
@@ -4,12 +4,12 @@
 
 ## VIASH START
 par = {
-    "input_embedding": "resources_test/dimensionality_reduction/pancreas/embedding.h5ad",
-    "input_solution": "resources_test/dimensionality_reduction/pancreas/solution.h5ad",
+    "input_embedding": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/embedding.h5ad",
+    "input_solution": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad",
     "output": "output.h5ad",
     "nmi_avg_method": "arithmetic",
 }
-meta = {"functionality_name": "clustering_performance"}
+meta = {"name": "clustering_performance"}
 ## VIASH END
 
 print("Reading input files", flush=True)
diff --git a/src/metrics/coranking/script.R b/src/metrics/coranking/script.R
index 74c1f1b..a835bdb 100644
--- a/src/metrics/coranking/script.R
+++ b/src/metrics/coranking/script.R
@@ -3,8 +3,8 @@ library(coRanking)
 
 ## VIASH START
 par <- list(
-  "input_embedding" = "resources_test/dimensionality_reduction/pancreas/reduced.h5ad",
-  "input_solution" = "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+  "input_embedding" = "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/reduced.h5ad",
+  "input_solution" = "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
   "output" = "score.h5ad"
 )
 ## VIASH END
diff --git a/src/metrics/density_preservation/script.py b/src/metrics/density_preservation/script.py
index 5635902..37cf27b 100644
--- a/src/metrics/density_preservation/script.py
+++ b/src/metrics/density_preservation/script.py
@@ -6,8 +6,8 @@
 
 ## VIASH START
 par = {
-    "input_embedding": "resources_test/dimensionality_reduction/pancreas/reduced.h5ad",
-    "input_solution": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "input_embedding": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/reduced.h5ad",
+    "input_solution": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
     "output": "score.h5ad",
     "n_neighbors": 30,
     "seed": 42,
diff --git a/src/metrics/distance_correlation/script.py b/src/metrics/distance_correlation/script.py
index a925584..b6a6117 100644
--- a/src/metrics/distance_correlation/script.py
+++ b/src/metrics/distance_correlation/script.py
@@ -8,8 +8,8 @@
 
 ## VIASH START
 par = {
-    "input_embedding": "resources_test/dimensionality_reduction/pancreas/embedding.h5ad",
-    "input_solution": "resources_test/dimensionality_reduction/pancreas/solution.h5ad",
+    "input_embedding": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/embedding.h5ad",
+    "input_solution": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/solution.h5ad",
     "output": "score.h5ad",
 }
 ## VIASH END
diff --git a/src/metrics/trustworthiness/script.py b/src/metrics/trustworthiness/script.py
index cbd7d39..b731b45 100644
--- a/src/metrics/trustworthiness/script.py
+++ b/src/metrics/trustworthiness/script.py
@@ -3,8 +3,8 @@
 
 ## VIASH START
 par = {
-    "input_embedding": "resources_test/dimensionality_reduction/pancreas/reduced.h5ad",
-    "input_solution": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "input_embedding": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/reduced.h5ad",
+    "input_solution": "resources_test/task_dimensionality_reduction/cxg_mouse_pancreas_atlas/test.h5ad",
     "output": "score.h5ad",
 }
 ## VIASH END
diff --git a/src/workflows/process_datasets/config.vsh.yaml b/src/workflows/process_datasets/config.vsh.yaml
index 7000eb8..032cc8e 100644
--- a/src/workflows/process_datasets/config.vsh.yaml
+++ b/src/workflows/process_datasets/config.vsh.yaml
@@ -1,29 +1,23 @@
 name: process_datasets
 namespace: workflows
 
-status: disabled
-
 argument_groups:
-  # - name: Inputs
-  #   arguments:
-  #     - name: "--input"
-  #       __merge__: /src/api/file_common_dataset.yaml
-  #       required: true
-  #       direction: input
-  # - name: Outputs
-  #   arguments:
-  #     - name: "--output_train"
-  #       __merge__: /src/api/file_train_h5ad.yaml
-  #       required: true
-  #       direction: output
-  #     - name: "--output_test"
-  #       __merge__: /src/api/file_test_h5ad.yaml
-  #       required: true
-  #       direction: output
-  #     - name: "--output_solution"
-  #       __merge__: /src/api/file_solution.yaml
-  #       required: true
-  #       direction: output
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        __merge__: /src/api/file_common_dataset.yaml
+        required: true
+        direction: input
+  - name: Outputs
+    arguments:
+      - name: "--output_dataset"
+        __merge__: /src/api/file_dataset.yaml
+        required: true
+        direction: output
+      - name: "--output_solution"
+        __merge__: /src/api/file_solution.yaml
+        required: true
+        direction: output
 
 resources:
   - type: nextflow_script
@@ -32,10 +26,8 @@ resources:
   - path: /common/nextflow_helpers/helper.nf
 
 dependencies:
-  - name: common/check_dataset_schema
-    repository: openproblems-v2
-  - name: common/extract_metadata
-    repository: openproblems-v2
+  - name: schema/verify_data_structure
+    repository: core
   - name: data_processors/process_dataset
 
 runners:
diff --git a/src/workflows/process_datasets/main.nf b/src/workflows/process_datasets/main.nf
index eae19f7..4459118 100644
--- a/src/workflows/process_datasets/main.nf
+++ b/src/workflows/process_datasets/main.nf
@@ -1,7 +1,7 @@
 include { findArgumentSchema } from "${meta.resources_dir}/helper.nf"
 
 workflow auto {
-  findStatesTemp(params, meta.config)
+  findStates(params, meta.config)
     | meta.workflow.run(
       auto: [publish: "state"]
     )
@@ -14,7 +14,7 @@ workflow run_wf {
   main:
   output_ch = input_ch
 
-    | check_dataset_schema.run(
+    | verify_data_structure.run(
       fromState: { id, state ->
         def schema = findArgumentSchema(meta.config, "input")
         def schemaYaml = tempFile("schema.yaml")
@@ -39,135 +39,16 @@ workflow run_wf {
     }
 
     | process_dataset.run(
-      fromState: [ input: "dataset" ],
+      fromState: [ input: "input" ],
       toState: [
-        output_train: "output_train",
-        output_test: "output_test",
+        output_dataset: "output_dataset",
         output_solution: "output_solution"
       ]
     )
 
     // only output the files for which an output file was specified
-    | setState(["output_train", "output_test", "output_solution"])
+    | setState(["output_dataset", "output_solution"])
 
   emit:
   output_ch
 }
-
-
-// temp fix for rename_keys typo
-
-def findStatesTemp(Map params, Map config) {
-  def auto_config = deepClone(config)
-  def auto_params = deepClone(params)
-
-  auto_config = auto_config.clone()
-  // override arguments
-  auto_config.argument_groups = []
-  auto_config.arguments = [
-    [
-      type: "string",
-      name: "--id",
-      description: "A dummy identifier",
-      required: false
-    ],
-    [
-      type: "file",
-      name: "--input_states",
-      example: "/path/to/input/directory/**/state.yaml",
-      description: "Path to input directory containing the datasets to be integrated.",
-      required: true,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--filter",
-      example: "foo/.*/state.yaml",
-      description: "Regex to filter state files by path.",
-      required: false
-    ],
-    // to do: make this a yaml blob?
-    [
-      type: "string",
-      name: "--rename_keys",
-      example: ["newKey1:oldKey1", "newKey2:oldKey2"],
-      description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.",
-      required: false,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--settings",
-      example: '{"output_dataset": "dataset.h5ad", "k": 10}',
-      description: "Global arguments as a JSON glob to be passed to all components.",
-      required: false
-    ]
-  ]
-  if (!(auto_params.containsKey("id"))) {
-    auto_params["id"] = "auto"
-  }
-
-  // run auto config through processConfig once more
-  auto_config = processConfig(auto_config)
-
-  workflow findStatesTempWf {
-    helpMessage(auto_config)
-
-    output_ch = 
-      channelFromParams(auto_params, auto_config)
-        | flatMap { autoId, args ->
-
-          def globalSettings = args.settings ? readYamlBlob(args.settings) : [:]
-
-          // look for state files in input dir
-          def stateFiles = args.input_states
-
-          // filter state files by regex
-          if (args.filter) {
-            stateFiles = stateFiles.findAll{ stateFile ->
-              def stateFileStr = stateFile.toString()
-              def matcher = stateFileStr =~ args.filter
-              matcher.matches()}
-          }
-
-          // read in states
-          def states = stateFiles.collect { stateFile ->
-            def state_ = readTaggedYaml(stateFile)
-            [state_.id, state_]
-          }
-
-          // construct renameMap
-          if (args.rename_keys) {
-            def renameMap = args.rename_keys.collectEntries{renameString ->
-              def split = renameString.split(":")
-              assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'"
-              split
-            }
-
-            // rename keys in state, only let states through which have all keys
-            // also add global settings
-            states = states.collectMany{id, state ->
-              def newState = [:]
-
-              for (key in renameMap.keySet()) {
-                def origKey = renameMap[key]
-                if (!(state.containsKey(origKey))) {
-                  return []
-                }
-                newState[key] = state[origKey]
-              }
-
-              [[id, globalSettings + newState]]
-            }
-          }
-
-          states
-        }
-    emit:
-    output_ch
-  }
-
-  return findStatesTempWf
-}
\ No newline at end of file
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 1976ed4..25e153a 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -1,60 +1,53 @@
 name: run_benchmark
 namespace: workflows
 
-status: disabled
-
 argument_groups:
-  # - name: Inputs
-  #   arguments:
-  #     - name: "--input_train"
-  #       __merge__: /src/api/file_train_h5ad.yaml
-  #       type: file
-  #       direction: input
-  #       required: true
-  #     - name: "--input_test"
-  #       __merge__: /src/api/file_test_h5ad.yaml
-  #       type: file
-  #       direction: input
-  #       required: true
-  #     - name: "--input_solution"
-  #       __merge__: /src/api/file_solution.yaml
-  #       type: file
-  #       direction: input
-  #       required: true
-  # - name: Outputs
-  #   arguments:
-  #     - name: "--output_scores"
-  #       type: file
-  #       required: true
-  #       direction: output
-  #       description: A yaml file containing the scores of each of the methods
-  #       default: score_uns.yaml
-  #     - name: "--output_method_configs"
-  #       type: file
-  #       required: true
-  #       direction: output
-  #       default: method_configs.yaml
-  #     - name: "--output_metric_configs"
-  #       type: file
-  #       required: true
-  #       direction: output
-  #       default: metric_configs.yaml
-  #     - name: "--output_dataset_info"
-  #       type: file
-  #       required: true
-  #       direction: output
-  #       default: dataset_uns.yaml
-  #     - name: "--output_task_info"
-  #       type: file
-  #       required: true
-  #       direction: output
-  #       default: task_info.yaml
-  # - name: Methods
-  #   arguments:
-  #     - name: "--method_ids"
-  #       type: string
-  #       multiple: true
-  #       description: A list of method ids to run. If not specified, all methods will be run.
+  - name: Inputs
+    arguments:
+      - name: "--input_dataset"
+        __merge__: /src/api/file_dataset.yaml
+        type: file
+        direction: input
+        required: true
+      - name: "--input_solution"
+        __merge__: /src/api/file_solution.yaml
+        type: file
+        direction: input
+        required: true
+  - name: Outputs
+    arguments:
+      - name: "--output_scores"
+        type: file
+        required: true
+        direction: output
+        description: A yaml file containing the scores of each of the methods
+        default: score_uns.yaml
+      - name: "--output_method_configs"
+        type: file
+        required: true
+        direction: output
+        default: method_configs.yaml
+      - name: "--output_metric_configs"
+        type: file
+        required: true
+        direction: output
+        default: metric_configs.yaml
+      - name: "--output_dataset_info"
+        type: file
+        required: true
+        direction: output
+        default: dataset_uns.yaml
+      - name: "--output_task_info"
+        type: file
+        required: true
+        direction: output
+        default: task_info.yaml
+  - name: Methods
+    arguments:
+      - name: "--method_ids"
+        type: string
+        multiple: true
+        description: A list of method ids to run. If not specified, all methods will be run.
 
 resources:
   - type: nextflow_script
@@ -64,13 +57,30 @@ resources:
     path: /_viash.yaml
 
 dependencies:
-  - name: common/check_dataset_schema
-    repository: openproblems-v2
-  - name: common/extract_metadata
-    repository: openproblems-v2
-  - name: control_methods/true_labels
-  - name: methods/logistic_regression
-  - name: metrics/accuracy
+  - name: h5ad/extract_uns_metadata
+    repository: core
+  # Control methods
+  - name: control_methods/random_features
+  - name: control_methods/spectral_features
+  - name: control_methods/true_features
+  # Methods
+  - name: methods/densmap
+  - name: methods/diffusion_map
+  - name: methods/ivis
+  - name: methods/lmds
+  - name: methods/neuralee
+  - name: methods/pca
+  - name: methods/phate
+  - name: methods/pymde
+  - name: methods/simlr
+  - name: methods/tsne
+  - name: methods/umap
+  # Metrics
+  - name: metrics/clustering_performance
+  - name: metrics/coranking
+  - name: metrics/density_preservation
+  - name: metrics/distance_correlation
+  - name: metrics/trustworthiness
 
 runners:
   - type: nextflow
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 68e5ecd..631f4ed 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -1,38 +1,56 @@
 workflow auto {
-  findStatesTemp(params, meta.config)
+  findStates(params, meta.config)
     | meta.workflow.run(
       auto: [publish: "state"]
     )
 }
 
+// construct list of methods and control methods
+methods = [
+  // Control methods
+  random_features,
+  spectral_features,
+  true_features,
+  // Real methods
+  densmap,
+  diffusion_map,
+  ivis,
+  lmds,
+  neuralee,
+  pca,
+  phate,
+  pymde,
+  simlr,
+  tsne,
+  umap
+]
+
+// construct list of metrics
+metrics = [
+  clustering_performance,
+  coranking,
+  density_preservation,
+  distance_correlation,
+  trustworthiness
+]
+
 workflow run_wf {
   take:
   input_ch
 
   main:
 
-  // construct list of methods
-  methods = [
-    true_labels,
-    logistic_regression
-  ]
-
-  // construct list of metrics
-  metrics = [
-    accuracy
-  ]
-
   /****************************
    * EXTRACT DATASET METADATA *
    ****************************/
   dataset_ch = input_ch
     // store join id
-    | map{ id, state -> 
+    | map{ id, state ->
       [id, state + ["_meta": [join_id: id]]]
     }
 
     // extract the dataset metadata
-    | extract_metadata.run(
+    | extract_uns_metadata.run(
       fromState: [input: "input_solution"],
       toState: { id, output, state ->
         state + [
@@ -70,8 +88,7 @@ workflow run_wf {
       // use 'fromState' to fetch the arguments the component requires from the overall state
       fromState: { id, state, comp ->
         def new_args = [
-          input_train: state.input_train,
-          input_test: state.input_test
+          input: state.input_dataset,
         ]
         if (comp.config.info.type == "control_method") {
           new_args.input_solution = state.input_solution
@@ -96,8 +113,8 @@ workflow run_wf {
       },
       // use 'fromState' to fetch the arguments the component requires from the overall state
       fromState: [
-        input_solution: "input_solution", 
-        input_prediction: "method_output"
+        input_solution: "input_solution",
+        input_embedding: "method_output"
       ],
       // use 'toState' to publish that component's outputs to the overall state
       toState: { id, output, state, comp ->
@@ -108,6 +125,26 @@ workflow run_wf {
       }
     )
 
+    // extract the scores
+    | extract_uns_metadata.run(
+      key: "extract_scores",
+      fromState: [input: "metric_output"],
+      toState: { id, output, state ->
+        state + [
+          score_uns: readYaml(output.output).uns
+        ]
+      }
+    )
+
+    | joinStates { ids, states ->
+      // store the scores in a file
+      def score_uns = states.collect{it.score_uns}
+      def score_uns_yaml_blob = toYamlBlob(score_uns)
+      def score_uns_file = tempFile("score_uns.yaml")
+      score_uns_file.write(score_uns_yaml_blob)
+
+      ["output", [output_scores: score_uns_file]]
+    }
 
   /******************************
    * GENERATE OUTPUT YAML FILES *
@@ -115,7 +152,7 @@ workflow run_wf {
   // TODO: can we store everything below in a separate helper function?
 
   // extract the dataset metadata
-  dataset_meta_ch = dataset_ch
+  meta_ch = dataset_ch
     // only keep one of the normalization methods
     | filter{ id, state ->
       state.dataset_uns.normalization_id == "log_cp10k"
@@ -131,23 +168,6 @@ workflow run_wf {
       def dataset_uns_file = tempFile("dataset_uns.yaml")
       dataset_uns_file.write(dataset_uns_yaml_blob)
 
-      ["output", [output_dataset_info: dataset_uns_file]]
-    }
-
-  output_ch = score_ch
-
-    // extract the scores
-    | extract_metadata.run(
-      key: "extract_scores",
-      fromState: [input: "metric_output"],
-      toState: { id, output, state ->
-        state + [
-          score_uns: readYaml(output.output).uns
-        ]
-      }
-    )
-
-    | joinStates { ids, states ->
       // store the method configs in a file
       def method_configs = methods.collect{it.config}
       def method_configs_yaml_blob = toYamlBlob(method_configs)
@@ -160,30 +180,24 @@ workflow run_wf {
       def metric_configs_file = tempFile("metric_configs.yaml")
       metric_configs_file.write(metric_configs_yaml_blob)
 
+      // store the task info in a file
       def viash_file = meta.resources_dir.resolve("_viash.yaml")
-      def viash_file_content = toYamlBlob(readYaml(viash_file).info)
-      def task_info_file = tempFile("task_info.yaml")
-      task_info_file.write(viash_file_content)
-
-      // store the scores in a file
-      def score_uns = states.collect{it.score_uns}
-      def score_uns_yaml_blob = toYamlBlob(score_uns)
-      def score_uns_file = tempFile("score_uns.yaml")
-      score_uns_file.write(score_uns_yaml_blob)
 
+      // create output state
       def new_state = [
+        output_dataset_info: dataset_uns_file,
         output_method_configs: method_configs_file,
         output_metric_configs: metric_configs_file,
-        output_task_info: task_info_file,
-        output_scores: score_uns_file,
+        output_task_info: viash_file,
         _meta: states[0]._meta
       ]
 
       ["output", new_state]
     }
 
-    // merge all of the output data 
-    | mix(dataset_meta_ch)
+  // merge all of the output data
+  output_ch = score_ch
+    | mix(meta_ch)
     | joinStates{ ids, states ->
       def mergedStates = states.inject([:]) { acc, m -> acc + m }
       [ids[0], mergedStates]
@@ -192,120 +206,3 @@ workflow run_wf {
   emit:
   output_ch
 }
-
-// temp fix for rename_keys typo
-
-def findStatesTemp(Map params, Map config) {
-  def auto_config = deepClone(config)
-  def auto_params = deepClone(params)
-
-  auto_config = auto_config.clone()
-  // override arguments
-  auto_config.argument_groups = []
-  auto_config.arguments = [
-    [
-      type: "string",
-      name: "--id",
-      description: "A dummy identifier",
-      required: false
-    ],
-    [
-      type: "file",
-      name: "--input_states",
-      example: "/path/to/input/directory/**/state.yaml",
-      description: "Path to input directory containing the datasets to be integrated.",
-      required: true,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--filter",
-      example: "foo/.*/state.yaml",
-      description: "Regex to filter state files by path.",
-      required: false
-    ],
-    // to do: make this a yaml blob?
-    [
-      type: "string",
-      name: "--rename_keys",
-      example: ["newKey1:oldKey1", "newKey2:oldKey2"],
-      description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.",
-      required: false,
-      multiple: true,
-      multiple_sep: ";"
-    ],
-    [
-      type: "string",
-      name: "--settings",
-      example: '{"output_dataset": "dataset.h5ad", "k": 10}',
-      description: "Global arguments as a JSON glob to be passed to all components.",
-      required: false
-    ]
-  ]
-  if (!(auto_params.containsKey("id"))) {
-    auto_params["id"] = "auto"
-  }
-
-  // run auto config through processConfig once more
-  auto_config = processConfig(auto_config)
-
-  workflow findStatesTempWf {
-    helpMessage(auto_config)
-
-    output_ch = 
-      channelFromParams(auto_params, auto_config)
-        | flatMap { autoId, args ->
-
-          def globalSettings = args.settings ? readYamlBlob(args.settings) : [:]
-
-          // look for state files in input dir
-          def stateFiles = args.input_states
-
-          // filter state files by regex
-          if (args.filter) {
-            stateFiles = stateFiles.findAll{ stateFile ->
-              def stateFileStr = stateFile.toString()
-              def matcher = stateFileStr =~ args.filter
-              matcher.matches()}
-          }
-
-          // read in states
-          def states = stateFiles.collect { stateFile ->
-            def state_ = readTaggedYaml(stateFile)
-            [state_.id, state_]
-          }
-
-          // construct renameMap
-          if (args.rename_keys) {
-            def renameMap = args.rename_keys.collectEntries{renameString ->
-              def split = renameString.split(":")
-              assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey;newKey:oldKey'"
-              split
-            }
-
-            // rename keys in state, only let states through which have all keys
-            // also add global settings
-            states = states.collectMany{id, state ->
-              def newState = [:]
-
-              for (key in renameMap.keySet()) {
-                def origKey = renameMap[key]
-                if (!(state.containsKey(origKey))) {
-                  return []
-                }
-                newState[key] = state[origKey]
-              }
-
-              [[id, globalSettings + newState]]
-            }
-          }
-
-          states
-        }
-    emit:
-    output_ch
-  }
-
-  return findStatesTempWf
-}
\ No newline at end of file