Skip to content

Commit

Permalink
fix abca script
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Oct 1, 2024
1 parent d6ca3be commit 4589ee4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ param_list:
- CTXsp
- HPF
- HY
- Isocortex-1
- Isocortex-2
- Isocortex-3
- Isocortex-4
- Isocortex
- MB
- OLF
- TH
Expand Down
25 changes: 17 additions & 8 deletions src/datasets/loaders/allen_brain_cell_atlas/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from collections import defaultdict
import anndata as ad
from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache
import re

## VIASH START
par = {
"abca_version": "20230630",
"regions": ["MB", "TF"],
"regions": ["MB", "TH"],
"sample_n_obs": 5000,
"sample_obs_weight": "subclass",
"sample_transform": "sqrt",
Expand Down Expand Up @@ -76,13 +77,24 @@
# TODO: potentially also load other chemistries (currently only 10Xv2)

print("Downloading and reading expression matrices", flush=True)
abca_data_subdir="WMB-10Xv2"
abca_region_files = [
[file, region]
for file in abc_cache.list_data_files(abca_data_subdir)
for region in REGIONS
if re.match(f"WMB-10Xv2-{region}[\\-0-9]*/raw", file)
]

adatas = []
for region in REGIONS:
for region, abca_data_file in abca_region_files:
try:
print(f"Downloading h5ad file for region {region}", flush=True)
adata_path = abc_cache.get_data_path(directory="WMB-10Xv2", file_name=f"WMB-10Xv2-{region}/raw")
print(f"Downloading file {abca_data_file} for region {region}", flush=True)
adata_path = abc_cache.get_data_path(
directory=abca_data_subdir,
file_name=abca_data_file
)

print(f"Reading h5ad for region {region}", flush=True)
print(f"Reading file {adata_path}", flush=True)
adata = ad.read_h5ad(str(adata_path))

if not par["keep_files"]:
Expand All @@ -91,9 +103,6 @@
# filter cells
adata = adata[adata.obs_names.isin(obs.index)].copy()

# add region to obs
adata.obs["region"] = region

# move counts to layer
adata.layers["counts"] = adata.X
del adata.X
Expand Down

0 comments on commit 4589ee4

Please sign in to comment.