update resources

openproblems-bio · Sep 2, 2024 · de9fe31 · de9fe31
1 parent ee8b2ff
commit de9fe31
Show file tree

Hide file tree

Showing 29 changed files with 92 additions and 64 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# task_template x.y.z
+# task_label_projection x.y.z
 
 ## BREAKING CHANGES
 

diff --git a/README.md b/README.md
@@ -140,7 +140,8 @@ Arguments:
 
 The solution for the test data
 
-Example file: `resources_test/label_projection/pancreas/solution.h5ad`
+Example file:
+`resources_test/task_label_projection/pancreas/solution.h5ad`
 
 Format:
 
@@ -183,7 +184,7 @@ Data structure:
 
 The test data (without labels)
 
-Example file: `resources_test/label_projection/pancreas/test.h5ad`
+Example file: `resources_test/task_label_projection/pancreas/test.h5ad`
 
 Format:
 
@@ -219,7 +220,7 @@ Data structure:
 
 The training data
 
-Example file: `resources_test/label_projection/pancreas/train.h5ad`
+Example file: `resources_test/task_label_projection/pancreas/train.h5ad`
 
 Format:
 
@@ -305,7 +306,8 @@ Arguments:
 
 The prediction file
 
-Example file: `resources_test/label_projection/pancreas/prediction.h5ad`
+Example file:
+`resources_test/task_label_projection/pancreas/prediction.h5ad`
 
 Format:
 
@@ -334,7 +336,7 @@ Data structure:
 
 Metric score file
 
-Example file: `resources_test/label_projection/pancreas/score.h5ad`
+Example file: `resources_test/task_label_projection/pancreas/score.h5ad`
 
 Format:
 

diff --git a/_viash.yaml b/_viash.yaml
@@ -58,8 +58,8 @@ info:
       path: s3://openproblems-data/resources_test/common/pancreas/
       dest: resources_test/common/pancreas
     - type: s3
-      path: s3://openproblems-data/resources_test/label_projection/
-      dest: resources_test/label_projection
+      path: s3://openproblems-data/resources_test/task_label_projection/
+      dest: resources_test/task_label_projection
 
 authors:
   - name: "Nikolay Markov"

diff --git a/scripts/create_resources.sh b/scripts/create_resources.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+cat > /tmp/params.yaml << 'HERE'
+input_states: s3://openproblems-data/resources/datasets/**/state.yaml
+rename_keys: 'input:output_dataset'
+output_state: "state.yaml"
+settings: '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}'
+publish_dir: s3://openproblems-data/resources/task_label_projection/datasets/
+HERE
+
+tw launch https://github.com/openproblems-bio/task_label_projection.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/run_benchmark/main.nf \
+  --workspace 53907369739130 \
+  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --params-file /tmp/params.yaml \
+  --entry-name auto \
+  --config common/nextflow_helpers/labels_tw.config \
+  --labels task_label_projection,create_resources
diff --git a/scripts/create_test_resources.sh b/scripts/create_test_resources.sh
@@ -9,7 +9,7 @@ cd "$REPO_ROOT"
 set -e
 
 RAW_DATA=resources_test/common
-DATASET_DIR=resources_test/task_template
+DATASET_DIR=resources_test/task_label_projection
 
 mkdir -p $DATASET_DIR
 
@@ -18,21 +18,27 @@ echo Running process_dataset
 nextflow run . \
   -main-script target/nextflow/workflows/process_datasets/main.nf \
   -profile docker \
-  -entry auto \
-  --input_states "$RAW_DATA/**/state.yaml" \
-  --rename_keys 'input:output_dataset' \
-  --settings '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' \
   --publish_dir "$DATASET_DIR" \
+  --id "pancreas" \
+  --input "$RAW_DATA/pancreas/dataset.h5ad" \
+  --output_train '$id/train.h5ad' \
+  --output_test '$id/test.h5ad' \
+  --output_solution '$id/solution.h5ad' \
   --output_state '$id/state.yaml'
 
 # run one method
-viash run src/methods/logistic_regression/config.vsh.yaml -- \
+viash run src/methods/knn/config.vsh.yaml -- \
     --input_train $DATASET_DIR/pancreas/train.h5ad \
     --input_test $DATASET_DIR/pancreas/test.h5ad \
-    --output $DATASET_DIR/pancreas/denoised.h5ad
+    --output $DATASET_DIR/pancreas/prediction.h5ad
 
 # run one metric
 viash run src/metrics/accuracy/config.vsh.yaml -- \
-    --input_predicition $DATASET_DIR/pancreas/predicted.h5ad \
+    --input_prediction $DATASET_DIR/pancreas/prediction.h5ad \
     --input_solution $DATASET_DIR/pancreas/solution.h5ad \
-    --output $DATASET_DIR/pancreas/score.h5ad
+    --output $DATASET_DIR/pancreas/score.h5ad
+
+# only run this if you have access to the openproblems-data bucket
+aws s3 sync --profile op \
+  "$DATASET_DIR" s3://openproblems-data/resources/task_label_projection \
+  --delete --dryrun
diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh
@@ -1,17 +1,17 @@
 #!/bin/bash
 
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
-publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/task_label_projection/results/${RUN_ID}"
 
 # make sure only log_cp10k is used
 cat > /tmp/params.yaml << HERE
-input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
+input_states: s3://openproblems-data/resources/task_label_projection/datasets/**/state.yaml
 rename_keys: 'input_train:output_train;input_test:output_test'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-tw launch https://github.com/openproblems-bio/task_template.git \
+tw launch https://github.com/openproblems-bio/task_label_projection.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
@@ -20,4 +20,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,full
+  --labels task_label_projection,full
diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
 
 cat > /tmp/params.yaml << 'HERE'
-input_states: s3://openproblems-data/resources_test/task_template/**/state.yaml
+input_states: s3://openproblems-data/resources_test/task_label_projection/**/state.yaml
 rename_keys: 'input_train:output_train;input_test:output_test'
 output_state: "state.yaml"
-publish_dir: s3://openproblems-nextflow/temp/task_template/
+publish_dir: s3://openproblems-nextflow/temp/task_label_projection/
 HERE
 
-tw launch https://github.com/openproblems-bio/task_template.git \
+tw launch https://github.com/openproblems-bio/task_label_projection.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
@@ -16,4 +16,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,test
+  --labels task_label_projection,test
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
@@ -29,8 +29,8 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/label_projection/pancreas
-    dest: resources_test/label_projection/pancreas
+  - path: /resources_test/task_label_projection/pancreas
+    dest: resources_test/task_label_projection/pancreas
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script

diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -21,8 +21,8 @@ arguments:
     direction: output
     required: true
 test_resources:
-  - path: /resources_test/label_projection/pancreas
-    dest: resources_test/label_projection/pancreas
+  - path: /resources_test/task_label_projection/pancreas
+    dest: resources_test/task_label_projection/pancreas
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script

diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
@@ -20,8 +20,8 @@ arguments:
     required: true
     direction: output
 test_resources:
-  - path: /resources_test/label_projection/pancreas
-    dest: resources_test/label_projection/pancreas
+  - path: /resources_test/task_label_projection/pancreas
+    dest: resources_test/task_label_projection/pancreas
   - type: python_script
     path: /common/component_tests/check_config.py
   - type: python_script

diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/label_projection/pancreas/prediction.h5ad"
+example: "resources_test/task_label_projection/pancreas/prediction.h5ad"
 label: "Prediction"
 summary: "The prediction file"
 info:

diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/label_projection/pancreas/score.h5ad"
+example: "resources_test/task_label_projection/pancreas/score.h5ad"
 label: "Score"
 summary: "Metric score file"
 info:

diff --git a/src/api/file_solution.yaml b/src/api/file_solution.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/label_projection/pancreas/solution.h5ad"
+example: "resources_test/task_label_projection/pancreas/solution.h5ad"
 label: "Solution"
 summary: "The solution for the test data"
 info:

diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/label_projection/pancreas/test.h5ad"
+example: "resources_test/task_label_projection/pancreas/test.h5ad"
 label: "Test data"
 summary: "The test data (without labels)"
 info:

diff --git a/src/api/file_train.yaml b/src/api/file_train.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/label_projection/pancreas/train.h5ad"
+example: "resources_test/task_label_projection/pancreas/train.h5ad"
 label: "Training data"
 summary: "The training data"
 info:

diff --git a/src/control_methods/majority_vote/script.py b/src/control_methods/majority_vote/script.py
@@ -3,8 +3,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/control_methods/random_labels/script.py b/src/control_methods/random_labels/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/control_methods/true_labels/script.py b/src/control_methods/true_labels/script.py
@@ -3,9 +3,9 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
-    'input_solution': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
+    'input_solution': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/methods/knn/script.py b/src/methods/knn/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/methods/logistic_regression/script.py b/src/methods/logistic_regression/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/methods/mlp/script.py b/src/methods/mlp/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/methods/naive_bayes/script.py b/src/methods/naive_bayes/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/methods/scanvi/script.py b/src/methods/scanvi/script.py
@@ -7,8 +7,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad',
     'num_hvg': 2000
 }

diff --git a/src/methods/scanvi_scarches/script.py b/src/methods/scanvi_scarches/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    "input_train": "resources_test/label_projection/pancreas/train.h5ad",
-    "input_test": "resources_test/label_projection/pancreas/test.h5ad",
+    "input_train": "resources_test/task_label_projection/pancreas/train.h5ad",
+    "input_test": "resources_test/task_label_projection/pancreas/test.h5ad",
     "output": "output.h5ad",
     "n_latent": 30,
     "n_layers": 2,

diff --git a/src/methods/seurat_transferdata/script.R b/src/methods/seurat_transferdata/script.R
@@ -6,8 +6,8 @@ library(magrittr, warn.conflicts = FALSE)
 
 ## VIASH START
 par <- list(
-  input_train = "resources_test/label_projection/pancreas/train.h5ad",
-  input_test = "resources_test/label_projection/pancreas/test.h5ad",
+  input_train = "resources_test/task_label_projection/pancreas/train.h5ad",
+  input_test = "resources_test/task_label_projection/pancreas/test.h5ad",
   output = "output.h5ad"
 )
 meta <- list(

diff --git a/src/methods/xgboost/script.py b/src/methods/xgboost/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_train': 'resources_test/label_projection/pancreas/train.h5ad',
-    'input_test': 'resources_test/label_projection/pancreas/test.h5ad',
+    'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad',
+    'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/metrics/accuracy/script.py b/src/metrics/accuracy/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_prediction': 'resources_test/label_projection/pancreas/knn.h5ad',
-    'input_solution': 'resources_test/label_projection/pancreas/solution.h5ad',
+    'input_prediction': 'resources_test/task_label_projection/pancreas/knn.h5ad',
+    'input_solution': 'resources_test/task_label_projection/pancreas/solution.h5ad',
     'output': 'output.h5ad'
 }
 meta = {

diff --git a/src/metrics/f1/script.py b/src/metrics/f1/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_prediction': 'resources_test/label_projection/pancreas/knn.h5ad',
-    'input_solution': 'resources_test/label_projection/pancreas/solution.h5ad',
+    'input_prediction': 'resources_test/task_label_projection/pancreas/knn.h5ad',
+    'input_solution': 'resources_test/task_label_projection/pancreas/solution.h5ad',
     'average': 'weighted',
     'output': 'output.h5ad'
 }