diff --git a/CHANGELOG.md b/CHANGELOG.md index 3839744..3806187 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# task_template x.y.z +# task_label_projection x.y.z ## BREAKING CHANGES diff --git a/README.md b/README.md index f046e55..2c6cb7c 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,8 @@ Arguments: The solution for the test data -Example file: `resources_test/label_projection/pancreas/solution.h5ad` +Example file: +`resources_test/task_label_projection/pancreas/solution.h5ad` Format: @@ -183,7 +184,7 @@ Data structure: The test data (without labels) -Example file: `resources_test/label_projection/pancreas/test.h5ad` +Example file: `resources_test/task_label_projection/pancreas/test.h5ad` Format: @@ -219,7 +220,7 @@ Data structure: The training data -Example file: `resources_test/label_projection/pancreas/train.h5ad` +Example file: `resources_test/task_label_projection/pancreas/train.h5ad` Format: @@ -305,7 +306,8 @@ Arguments: The prediction file -Example file: `resources_test/label_projection/pancreas/prediction.h5ad` +Example file: +`resources_test/task_label_projection/pancreas/prediction.h5ad` Format: @@ -334,7 +336,7 @@ Data structure: Metric score file -Example file: `resources_test/label_projection/pancreas/score.h5ad` +Example file: `resources_test/task_label_projection/pancreas/score.h5ad` Format: diff --git a/_viash.yaml b/_viash.yaml index 2df1c62..1bd2ef1 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -58,8 +58,8 @@ info: path: s3://openproblems-data/resources_test/common/pancreas/ dest: resources_test/common/pancreas - type: s3 - path: s3://openproblems-data/resources_test/label_projection/ - dest: resources_test/label_projection + path: s3://openproblems-data/resources_test/task_label_projection/ + dest: resources_test/task_label_projection authors: - name: "Nikolay Markov" diff --git a/scripts/create_resources.sh b/scripts/create_resources.sh new file mode 100644 index 0000000..aa66c74 --- /dev/null +++ b/scripts/create_resources.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +cat > /tmp/params.yaml << 'HERE' +input_states: s3://openproblems-data/resources/datasets/**/state.yaml +rename_keys: 'input:output_dataset' +output_state: "state.yaml" +settings: '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' +publish_dir: s3://openproblems-data/resources/task_label_projection/datasets/ +HERE + +tw launch https://github.com/openproblems-bio/task_label_projection.git \ + --revision build/main \ + --pull-latest \ + --main-script target/nextflow/workflows/run_benchmark/main.nf \ + --workspace 53907369739130 \ + --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ + --params-file /tmp/params.yaml \ + --entry-name auto \ + --config common/nextflow_helpers/labels_tw.config \ + --labels task_label_projection,create_resources diff --git a/scripts/create_test_resources.sh b/scripts/create_test_resources.sh old mode 100644 new mode 100755 index a39f8c4..34e578b --- a/scripts/create_test_resources.sh +++ b/scripts/create_test_resources.sh @@ -9,7 +9,7 @@ cd "$REPO_ROOT" set -e RAW_DATA=resources_test/common -DATASET_DIR=resources_test/task_template +DATASET_DIR=resources_test/task_label_projection mkdir -p $DATASET_DIR @@ -18,21 +18,27 @@ echo Running process_dataset nextflow run . \ -main-script target/nextflow/workflows/process_datasets/main.nf \ -profile docker \ - -entry auto \ - --input_states "$RAW_DATA/**/state.yaml" \ - --rename_keys 'input:output_dataset' \ - --settings '{"output_train": "$id/train.h5ad", "output_test": "$id/test.h5ad"}' \ --publish_dir "$DATASET_DIR" \ + --id "pancreas" \ + --input "$RAW_DATA/pancreas/dataset.h5ad" \ + --output_train '$id/train.h5ad' \ + --output_test '$id/test.h5ad' \ + --output_solution '$id/solution.h5ad' \ --output_state '$id/state.yaml' # run one method -viash run src/methods/logistic_regression/config.vsh.yaml -- \ +viash run src/methods/knn/config.vsh.yaml -- \ --input_train $DATASET_DIR/pancreas/train.h5ad \ --input_test $DATASET_DIR/pancreas/test.h5ad \ - --output $DATASET_DIR/pancreas/denoised.h5ad + --output $DATASET_DIR/pancreas/prediction.h5ad # run one metric viash run src/metrics/accuracy/config.vsh.yaml -- \ - --input_predicition $DATASET_DIR/pancreas/predicted.h5ad \ + --input_prediction $DATASET_DIR/pancreas/prediction.h5ad \ --input_solution $DATASET_DIR/pancreas/solution.h5ad \ - --output $DATASET_DIR/pancreas/score.h5ad \ No newline at end of file + --output $DATASET_DIR/pancreas/score.h5ad + +# only run this if you have access to the openproblems-data bucket +aws s3 sync --profile op \ + "$DATASET_DIR" s3://openproblems-data/resources/task_label_projection \ + --delete --dryrun diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh index cc4275e..0ae4a79 100644 --- a/scripts/run_benchmark.sh +++ b/scripts/run_benchmark.sh @@ -1,17 +1,17 @@ #!/bin/bash RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/task_label_projection/results/${RUN_ID}" # make sure only log_cp10k is used cat > /tmp/params.yaml << HERE -input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml +input_states: s3://openproblems-data/resources/task_label_projection/datasets/**/state.yaml rename_keys: 'input_train:output_train;input_test:output_test' output_state: "state.yaml" publish_dir: "$publish_dir" HERE -tw launch https://github.com/openproblems-bio/task_template.git \ +tw launch https://github.com/openproblems-bio/task_label_projection.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ @@ -20,4 +20,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \ --params-file /tmp/params.yaml \ --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ - --labels task_template,full \ No newline at end of file + --labels task_label_projection,full \ No newline at end of file diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh index 6c03d42..931be17 100644 --- a/scripts/run_benchmark_test.sh +++ b/scripts/run_benchmark_test.sh @@ -1,13 +1,13 @@ #!/bin/bash cat > /tmp/params.yaml << 'HERE' -input_states: s3://openproblems-data/resources_test/task_template/**/state.yaml +input_states: s3://openproblems-data/resources_test/task_label_projection/**/state.yaml rename_keys: 'input_train:output_train;input_test:output_test' output_state: "state.yaml" -publish_dir: s3://openproblems-nextflow/temp/task_template/ +publish_dir: s3://openproblems-nextflow/temp/task_label_projection/ HERE -tw launch https://github.com/openproblems-bio/task_template.git \ +tw launch https://github.com/openproblems-bio/task_label_projection.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ @@ -16,4 +16,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \ --params-file /tmp/params.yaml \ --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ - --labels task_template,test \ No newline at end of file + --labels task_label_projection,test diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml index 32e93e7..651fb7e 100644 --- a/src/api/comp_control_method.yaml +++ b/src/api/comp_control_method.yaml @@ -29,8 +29,8 @@ arguments: direction: output required: true test_resources: - - path: /resources_test/label_projection/pancreas - dest: resources_test/label_projection/pancreas + - path: /resources_test/task_label_projection/pancreas + dest: resources_test/task_label_projection/pancreas - type: python_script path: /common/component_tests/check_config.py - type: python_script diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index ed2d194..55fd3ea 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -21,8 +21,8 @@ arguments: direction: output required: true test_resources: - - path: /resources_test/label_projection/pancreas - dest: resources_test/label_projection/pancreas + - path: /resources_test/task_label_projection/pancreas + dest: resources_test/task_label_projection/pancreas - type: python_script path: /common/component_tests/check_config.py - type: python_script diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index 291fc15..1dffd0e 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -20,8 +20,8 @@ arguments: required: true direction: output test_resources: - - path: /resources_test/label_projection/pancreas - dest: resources_test/label_projection/pancreas + - path: /resources_test/task_label_projection/pancreas + dest: resources_test/task_label_projection/pancreas - type: python_script path: /common/component_tests/check_config.py - type: python_script diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index 2851976..b53cd4d 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/label_projection/pancreas/prediction.h5ad" +example: "resources_test/task_label_projection/pancreas/prediction.h5ad" label: "Prediction" summary: "The prediction file" info: diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 9633987..3a17fc5 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/label_projection/pancreas/score.h5ad" +example: "resources_test/task_label_projection/pancreas/score.h5ad" label: "Score" summary: "Metric score file" info: diff --git a/src/api/file_solution.yaml b/src/api/file_solution.yaml index 38154f5..9513f1c 100644 --- a/src/api/file_solution.yaml +++ b/src/api/file_solution.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/label_projection/pancreas/solution.h5ad" +example: "resources_test/task_label_projection/pancreas/solution.h5ad" label: "Solution" summary: "The solution for the test data" info: diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml index a812da0..0470cbb 100644 --- a/src/api/file_test.yaml +++ b/src/api/file_test.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/label_projection/pancreas/test.h5ad" +example: "resources_test/task_label_projection/pancreas/test.h5ad" label: "Test data" summary: "The test data (without labels)" info: diff --git a/src/api/file_train.yaml b/src/api/file_train.yaml index f21e95e..4da40fd 100644 --- a/src/api/file_train.yaml +++ b/src/api/file_train.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/label_projection/pancreas/train.h5ad" +example: "resources_test/task_label_projection/pancreas/train.h5ad" label: "Training data" summary: "The training data" info: diff --git a/src/control_methods/majority_vote/script.py b/src/control_methods/majority_vote/script.py index b05f3d3..a29db8a 100644 --- a/src/control_methods/majority_vote/script.py +++ b/src/control_methods/majority_vote/script.py @@ -3,8 +3,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/control_methods/random_labels/script.py b/src/control_methods/random_labels/script.py index db1281d..0a99235 100644 --- a/src/control_methods/random_labels/script.py +++ b/src/control_methods/random_labels/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/control_methods/true_labels/script.py b/src/control_methods/true_labels/script.py index 2de0e87..b6cdf1b 100644 --- a/src/control_methods/true_labels/script.py +++ b/src/control_methods/true_labels/script.py @@ -3,9 +3,9 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', - 'input_solution': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', + 'input_solution': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/methods/knn/script.py b/src/methods/knn/script.py index f00af2a..ec164a7 100644 --- a/src/methods/knn/script.py +++ b/src/methods/knn/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/methods/logistic_regression/script.py b/src/methods/logistic_regression/script.py index 815fe42..64cc9e5 100644 --- a/src/methods/logistic_regression/script.py +++ b/src/methods/logistic_regression/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/methods/mlp/script.py b/src/methods/mlp/script.py index 5c46c8e..7e148dd 100644 --- a/src/methods/mlp/script.py +++ b/src/methods/mlp/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/methods/naive_bayes/script.py b/src/methods/naive_bayes/script.py index f4108d5..5103674 100644 --- a/src/methods/naive_bayes/script.py +++ b/src/methods/naive_bayes/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/methods/scanvi/script.py b/src/methods/scanvi/script.py index 5381004..84d6fa4 100644 --- a/src/methods/scanvi/script.py +++ b/src/methods/scanvi/script.py @@ -7,8 +7,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad', 'num_hvg': 2000 } diff --git a/src/methods/scanvi_scarches/script.py b/src/methods/scanvi_scarches/script.py index fb29461..52f4044 100644 --- a/src/methods/scanvi_scarches/script.py +++ b/src/methods/scanvi_scarches/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - "input_train": "resources_test/label_projection/pancreas/train.h5ad", - "input_test": "resources_test/label_projection/pancreas/test.h5ad", + "input_train": "resources_test/task_label_projection/pancreas/train.h5ad", + "input_test": "resources_test/task_label_projection/pancreas/test.h5ad", "output": "output.h5ad", "n_latent": 30, "n_layers": 2, diff --git a/src/methods/seurat_transferdata/script.R b/src/methods/seurat_transferdata/script.R index cce3269..28212f7 100644 --- a/src/methods/seurat_transferdata/script.R +++ b/src/methods/seurat_transferdata/script.R @@ -6,8 +6,8 @@ library(magrittr, warn.conflicts = FALSE) ## VIASH START par <- list( - input_train = "resources_test/label_projection/pancreas/train.h5ad", - input_test = "resources_test/label_projection/pancreas/test.h5ad", + input_train = "resources_test/task_label_projection/pancreas/train.h5ad", + input_test = "resources_test/task_label_projection/pancreas/test.h5ad", output = "output.h5ad" ) meta <- list( diff --git a/src/methods/xgboost/script.py b/src/methods/xgboost/script.py index 73c4bec..614046e 100644 --- a/src/methods/xgboost/script.py +++ b/src/methods/xgboost/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/label_projection/pancreas/train.h5ad', - 'input_test': 'resources_test/label_projection/pancreas/test.h5ad', + 'input_train': 'resources_test/task_label_projection/pancreas/train.h5ad', + 'input_test': 'resources_test/task_label_projection/pancreas/test.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/metrics/accuracy/script.py b/src/metrics/accuracy/script.py index a2b6a97..130cf9f 100644 --- a/src/metrics/accuracy/script.py +++ b/src/metrics/accuracy/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_prediction': 'resources_test/label_projection/pancreas/knn.h5ad', - 'input_solution': 'resources_test/label_projection/pancreas/solution.h5ad', + 'input_prediction': 'resources_test/task_label_projection/pancreas/knn.h5ad', + 'input_solution': 'resources_test/task_label_projection/pancreas/solution.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/metrics/f1/script.py b/src/metrics/f1/script.py index 075a7bb..3e11bc1 100644 --- a/src/metrics/f1/script.py +++ b/src/metrics/f1/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_prediction': 'resources_test/label_projection/pancreas/knn.h5ad', - 'input_solution': 'resources_test/label_projection/pancreas/solution.h5ad', + 'input_prediction': 'resources_test/task_label_projection/pancreas/knn.h5ad', + 'input_solution': 'resources_test/task_label_projection/pancreas/solution.h5ad', 'average': 'weighted', 'output': 'output.h5ad' } diff --git a/src/workflows/run_benchmark/test.sh b/src/workflows/run_benchmark/test.sh index 74a855a..56a91e1 100755 --- a/src/workflows/run_benchmark/test.sh +++ b/src/workflows/run_benchmark/test.sh @@ -8,7 +8,7 @@ cd "$REPO_ROOT" set -e -DATASETS_DIR="resources_test/label_projection" +DATASETS_DIR="resources_test/task_label_projection" OUTPUT_DIR="output/temp" if [ ! -d "$OUTPUT_DIR" ]; then