Skip to content

Commit

Permalink
fix scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Aug 29, 2024
1 parent 497ed55 commit 163f821
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 23 deletions.
25 changes: 13 additions & 12 deletions scripts/create_test_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,27 @@ viash run src/data_processors/crop_region/config.vsh.yaml -- \
--min_y 10000 \
--max_y 12000

aws s3 sync --profile op \
"resources_test/common/2023_10x_mouse_brain_xenium" \
"s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium" \
--delete --dryrun

###################################################################################
DATASET_ID="allen_brain_cell_atlas/2023_Yao_mouse_brain_scRNAseq_10Xv2"
DATASET_ID="allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2"
TMP_DIR="temp/datasets/$DATASET_ID"
OUT_DIR="resources_test/common/2023_abca_Yao_mouse_brain_scRNAseq_10Xv2"

OUT_DIR="resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2"

# generate sc reference
VIASH_TEMP=/tmp/allen_brain_cell_atlas \
viash run src/data_loaders/download_allen_brain_cell_atlas/config.vsh.yaml -- \
--output "$TMP_DIR/tmp_sc_reference.h5ad" --regions "OLF;TH"
viash run src/data_loaders/download_allen_brain_cell_atlas/config.vsh.yaml \
--keep true -- \
--output "$TMP_DIR/tmp_dataset.h5ad" --regions "OLF;TH"

viash run src/data_processors/subset_reference/config.vsh.yaml -- \
--input "$TMP_DIR/tmp_sc_reference.h5ad" \
--output "$OUT_DIR/sc_reference.h5ad"
--input "$TMP_DIR/tmp_dataset.h5ad" \
--output "$OUT_DIR/dataset.h5ad"



###################################################################################
aws s3 sync --profile op \
"resources_test/common/2023_10x_mouse_brain_xenium" \
"s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium" \
"resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2" \
"s3://openproblems-data/resources_test/common/2023_abca_Yao_mouse_brain_scRNAseq_10Xv2" \
--delete --dryrun
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ argument_groups:
- name: Inputs
arguments:
- type: string
name: --version
name: --abca_version
required: false
default: "20230630"
description: The version of the Allen Brain Cell Atlas to download data from.
Expand Down
10 changes: 5 additions & 5 deletions src/data_loaders/download_allen_brain_cell_atlas/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@

## VIASH START
par = {
"version": "20230630",
"abca_version": "20230630",
"regions": ["OLF", "TH"],
"output": "temp/datasets/allen_brain_cell_atlas/2023_Yao_mouse_brain_scRNAseq_10Xv2/dataset.h5ad",
"output": "tmp_dataset.h5ad",
}
meta = {
"temp_dir": "/tmp/allen_brain_cell_atlas",
}
## VIASH END

# helper variables
VERSION = par["version"]
VERSION = par["abca_version"]
REGIONS = par["regions"]
TMP_DIR = Path(meta["temp_dir"] or "/tmp")

Expand Down Expand Up @@ -116,8 +116,8 @@
adata.var = adata.var.rename(columns={"gene_symbol":"feature_name"})

# Uns
adata.uns["dataset_id"] = "2023_Yao_mouse_brain_scRNAseq_10Xv2"
adata.uns["dataset_name"] = "2023_Yao_mouse_brain_scRNAseq_10Xv2"
adata.uns["dataset_id"] = "allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2"
adata.uns["dataset_name"] = "ABCA Mouse Brain scRNAseq"
adata.uns["dataset_url"] = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717"
adata.uns["dataset_reference"] = "10.1038/s41586-023-06812-z"
adata.uns["dataset_summary"] = "A high-resolution scRNAseq atlas of cell types in the whole mouse brain"
Expand Down
4 changes: 2 additions & 2 deletions src/data_processors/subset_reference/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ arguments:
direction: output
- type: integer
name: --n_cells
required: true
required: false
description: The number of cells to sample.
default: 500
- type: integer
Expand All @@ -23,7 +23,7 @@ arguments:
default: 50
- type: string
name: --cell_type_key
required: true
required: false
description: The key in the obs dataframe that contains the cell type information.
default: cell_type
- name: "--keep_cell_type_categories"
Expand Down
7 changes: 4 additions & 3 deletions src/data_processors/subset_reference/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

### VIASH START
par = {
"input": "path/to/input.h5ad",
"output": "path/to/output.h5ad",
"input": "temp/datasets/allen_brain_cell_atlas/2023_Yao_mouse_brain_scRNAseq_10Xv2/tmp_dataset.h5ad",
"output": "resources_test/common/2023_abca_Yao_mouse_brain_scRNAseq_10Xv2/dataset.h5ad",
"n_cells": 500,
"min_n_cells_per_cell_type": 50,
"cell_type_key": "cell_type",
Expand Down Expand Up @@ -45,7 +45,8 @@
cell_type_indices = adata.obs_names[adata.obs[par["cell_type_key"]] == ct]
n_cells_in_type = len(cell_type_indices)
n_per_cell_type[ct] = min(n_cells_in_type, par["min_n_cells_per_cell_type"])

selected_indices.extend(np.random.choice(cell_type_indices, n_per_cell_type[ct], replace=False))

# Cap the number of cells to sample per cell type by the total number of cells to sample
#TODO: instead of random choice below, adjust that sum(n_per_cell_type.values()) <= par["n_cells"]

Expand Down

0 comments on commit 163f821

Please sign in to comment.