Skip to content

Commit

Permalink
merged latest and added iss reader
Browse files Browse the repository at this point in the history
  • Loading branch information
BioinfoTongLI committed Jul 5, 2023
2 parents 8ca602c + 9ebe2ba commit d9202d8
Show file tree
Hide file tree
Showing 11 changed files with 486 additions and 101 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
hooks:
- id: prettier
- repo: https://github.com/asottile/blacken-docs
rev: 1.13.0
rev: 1.14.0
hooks:
- id: blacken-docs
- repo: https://github.com/PyCQA/isort
Expand All @@ -29,7 +29,7 @@ repos:
additional_dependencies: [numpy]
exclude: docs/
- repo: https://github.com/asottile/yesqa
rev: v1.4.0
rev: v1.5.0
hooks:
- id: yesqa
additional_dependencies:
Expand Down Expand Up @@ -70,7 +70,7 @@ repos:
- flake8-bugbear
- flake8-blind-except
- repo: https://github.com/asottile/pyupgrade
rev: v3.4.0
rev: v3.7.0
hooks:
- id: pyupgrade
args: [--py3-plus, --py39-plus, --keep-runtime-typing]
Expand Down
10 changes: 1 addition & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,8 @@ This package contains reader functions to load common spatial omics formats into
- 10x Genomics Visium
- 10x Genomics Xenium
- Curio Seeker

Coming soon:

- Vizgen MERSCOPE (MERFISH)
- Spatial Genomics seqFISH
- Akoya PhenoCycler (formerly CODEX)

Also coming soon:

- Common image converters: .jpg <> .zarr
- Vizgen MERSCOPE (MERFISH)

## Getting started

Expand Down
2 changes: 2 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ I/O for the `spatialdata` project.
.. autosummary::
:toctree: generated
codex
curio
cosmx
visium
xenium
steinbock
merscope
mcmicro
```
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"imagecodecs",
"dask-image",
"pyarrow",
"readfcs",
]

[project.optional-dependencies]
Expand Down
4 changes: 4 additions & 0 deletions src/spatialdata_io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from importlib.metadata import version

from spatialdata_io.readers.codex import codex
from spatialdata_io.readers.cosmx import cosmx
from spatialdata_io.readers.curio import curio
from spatialdata_io.readers.mcmicro import mcmicro
from spatialdata_io.readers.merscope import merscope
from spatialdata_io.readers.steinbock import steinbock
from spatialdata_io.readers.visium import visium
from spatialdata_io.readers.xenium import xenium
Expand All @@ -12,10 +14,12 @@
"curio",
"visium",
"xenium",
"codex",
"cosmx",
"mcmicro",
"steinbock",
"iss",
"merscope",
]

__version__ = version("spatialdata-io")
51 changes: 47 additions & 4 deletions src/spatialdata_io/_constants/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@


@unique
class CodexKeys(ModeEnum):
"""Keys for *CODEX* formatted dataset."""

# files and directories
FCS_FILE = ".fcs"
FCS_FILE_CSV = ".csv"
# metadata
REGION_KEY = "region"
INSTANCE_KEY = "cell_id"
SPATIAL_KEY = "spatial"
# images
IMAGE_TIF = ".tif"


class CurioKeys(ModeEnum):
"""Keys for *Curio* formatted dataset."""

Expand Down Expand Up @@ -93,16 +107,15 @@ class VisiumKeys(ModeEnum):
# images
IMAGE_HIRES_FILE = "spatial/tissue_hires_image.png"
IMAGE_LOWRES_FILE = "spatial/tissue_lowres_image.png"
IMAGE_TIF_SUFFIX = "_tissue_image.tif"
IMAGE_TIF_ALTERNATIVE_SUFFIX = "_image.tif"

# scalefactors
SCALEFACTORS_FILE = "spatial/scalefactors_json.json"
SCALEFACTORS_FILE = "scalefactors_json.json"
SCALEFACTORS_HIRES = "tissue_hires_scalef"
SCALEFACTORS_LOWRES = "tissue_lowres_scalef"

# spots
SPOTS_FILE = "spatial/tissue_positions.csv"
SPOTS_FILE_1 = "tissue_positions_list.csv"
SPOTS_FILE_2 = "tissue_positions.csv"
SPOTS_X = "pxl_row_in_fullres"
SPOTS_Y = "pxl_col_in_fullres"

Expand Down Expand Up @@ -139,3 +152,33 @@ class McmicroKeys(ModeEnum):
COORDS_X = "X_centroid"
COORDS_Y = "Y_centroid"
INSTANCE_KEY = "CellID"


@unique
class MerscopeKeys(ModeEnum):
"""Keys for *MERSCOPE* data (Vizgen plateform)"""

# files and directories
IMAGES_DIR = "images"
TRANSFORMATION_FILE = "micron_to_mosaic_pixel_transform.csv"
TRANSCRIPTS_FILE = "detected_transcripts.csv"
BOUNDARIES_FILE = "cell_boundaries.parquet"
COUNTS_FILE = "cell_by_gene.csv"
CELL_METADATA_FILE = "cell_metadata.csv"

# VPT default outputs
CELLPOSE_BOUNDARIES = "cellpose_micron_space.parquet"
WATERSHED_BOUNDARIES = "watershed_micron_space.parquet"
VPT_NAME_COUNTS = "cell_by_gene"
VPT_NAME_OBS = "cell_metadata"
VPT_NAME_BOUNDARIES = "cell_boundaries"

# metadata
INSTANCE_KEY = "EntityID"
COUNTS_CELL_KEY = "cell"
CELL_X = "center_x"
CELL_Y = "center_y"
GLOBAL_X = "global_x"
GLOBAL_Y = "global_y"
GLOBAL_Z = "global_z"
Z_INDEX = "ZIndex"
55 changes: 29 additions & 26 deletions src/spatialdata_io/readers/_utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Any, Optional, Union

import numpy as np
from anndata import AnnData, read_mtx, read_text
from anndata import AnnData, read_text
from h5py import File

from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5
Expand All @@ -23,45 +23,48 @@

def _read_counts(
path: str | Path,
count_file: str,
counts_file: str,
library_id: Optional[str] = None,
**kwargs: Any,
) -> tuple[AnnData, str]:
path = Path(path)
library_id_: Optional[str] = None
if count_file.endswith(".h5"):
adata: AnnData = _read_10x_h5(path / count_file, **kwargs)
with File(path / count_file, mode="r") as f:
if counts_file.endswith(".h5"):
print(counts_file)
adata: AnnData = _read_10x_h5(path / counts_file, **kwargs)
with File(path / counts_file, mode="r") as f:
attrs = dict(f.attrs)
try:
lid = attrs.pop("library_ids")[0]
library_id_ = lid.decode("utf-8") if isinstance(lid, bytes) else str(lid)
except ValueError:
raise KeyError("Unable to extract library id from attributes. Please specify one explicitly.") from None
if library_id is not None:
if library_id != library_id_:
raise ValueError(
f"library_id {library_id} does not match library_id {library_id_} in the file. Check the output file."
)
if library_id is None:
try:
lid = attrs.pop("library_ids")[0]
library_id = lid.decode("utf-8") if isinstance(lid, bytes) else str(lid)
except ValueError:
raise KeyError(
"Unable to extract library id from attributes. Please specify one explicitly."
) from None

adata.uns["spatial"] = {library_id_: {"metadata": {}}} # can overwrite
adata.uns["spatial"] = {library_id: {"metadata": {}}} # can overwrite
for key in ["chemistry_description", "software_version"]:
if key not in attrs:
continue
metadata = attrs[key].decode("utf-8") if isinstance(attrs[key], bytes) else attrs[key]
adata.uns["spatial"][library_id_]["metadata"][key] = metadata
adata.uns["spatial"][library_id]["metadata"][key] = metadata

return adata, library_id_
return adata, library_id

if library_id_ is None:
if library_id is None:
raise ValueError("Please explicitly specify library id.")

if count_file.endswith((".csv", ".txt")):
adata = read_text(path / count_file, **kwargs)
elif count_file.endswith(".mtx"):
adata = read_mtx(path / count_file, **kwargs)
if counts_file.endswith((".csv", ".txt")):
adata = read_text(path / counts_file, **kwargs)
elif counts_file.endswith(".mtx.gz"):
try:
from scanpy.readwrite import read_10x_mtx
except ImportError:
raise ImportError("Please install scanpy to read 10x mtx files, `pip install scanpy`.")
prefix = counts_file.replace("matrix.mtx.gz", "")
adata = read_10x_mtx(path, prefix=prefix, **kwargs)
else:
raise NotImplementedError("TODO")

adata.uns["spatial"] = {library_id_: {"metadata": {}}} # can overwrite
return adata, library_id_
adata.uns["spatial"] = {library_id: {"metadata": {}}} # can overwrite
return adata, library_id
99 changes: 99 additions & 0 deletions src/spatialdata_io/readers/codex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from __future__ import annotations

import os
import re
from collections.abc import Mapping
from pathlib import Path
from types import MappingProxyType
from typing import Any

import anndata as ad
import pandas as pd
import readfcs
from dask_image.imread import imread
from spatialdata import SpatialData
from spatialdata._logging import logger
from spatialdata.models import Image2DModel, ShapesModel, TableModel

from spatialdata_io._constants._constants import CodexKeys
from spatialdata_io._docs import inject_docs

__all__ = ["codex"]


@inject_docs(vx=CodexKeys)
def codex(
path: str | Path,
fcs: bool = True,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> SpatialData:
"""
Read *CODEX* formatted dataset.
This function reads the following files:
- ``<dataset_id>_`{vx.FCS_FILE!r}```: Counts and metadata file.
- ``<dataset_id>_`{vx.IMAGE_TIF!r}```: High resolution tif image.
.. seealso::
- `CODEX output <https://help.codex.bio/codex/processor/technical-notes/expected-output>`_.
Parameters
----------
path
Path to the directory containing the data.
fcs
Whether a .fcs file is provided. If False, a .csv file is expected.
imread_kwargs
Keyword arguments passed to :func:`dask_image.imread.imread`.
Returns
-------
:class:`spatialdata.SpatialData`
"""
path = Path(path)
patt = re.compile(".*.fcs") if fcs else re.compile(".*.csv")
path_files = [i for i in os.listdir(path) if patt.match(i)]
if path_files and CodexKeys.FCS_FILE or CodexKeys.FCS_FILE_CSV in patt.pattern:
fcs = (
readfcs.ReadFCS(path / path_files[0]).data
if CodexKeys.FCS_FILE in path_files[0]
else pd.read_csv(path_files[0], header=0, index_col=None)
)
else:
raise ValueError("Cannot determine data set. Expecting a file with format .fcs or .csv")

adata = _codex_df_to_anndata(fcs)

xy = adata.obsm[CodexKeys.SPATIAL_KEY]
shapes = ShapesModel.parse(xy, geometry=0, radius=1, index=adata.obs[CodexKeys.INSTANCE_KEY])
region = adata.obs[CodexKeys.REGION_KEY].unique()[0]
adata.obs[CodexKeys.REGION_KEY] = adata.obs[CodexKeys.REGION_KEY].astype("category")
table = TableModel.parse(adata, region=region, region_key=CodexKeys.REGION_KEY, instance_key=CodexKeys.INSTANCE_KEY)

im_patt = re.compile(".*.tif")
path_files = [i for i in os.listdir(path) if im_patt.match(i)]
if path_files and CodexKeys.IMAGE_TIF in path_files[0]:
image = imread(path_files[0], **imread_kwargs)
images = {
"images": Image2DModel.parse(
image,
scale_factors=[2, 2],
)
}
sdata = SpatialData(images=images, shapes={str(region): shapes}, table=table)
else:
logger.warning("Cannot find .tif file. Will build spatialdata with shapes and table only.")
sdata = SpatialData(shapes={str(region): shapes}, table=table)

return sdata


def _codex_df_to_anndata(df: pd.DataFrame) -> ad.AnnData:
"""Convert a codex formatted .fcs dataframe or .csv file to anndata."""
adata = ad.AnnData(df.filter(regex="cyc.*"))
adata.obs = df[df.columns.drop(list(df.filter(regex="cyc.*")))]
adata.obsm[CodexKeys.SPATIAL_KEY] = df[["x", "y"]].values
adata.var_names_make_unique()
return adata
Loading

0 comments on commit d9202d8

Please sign in to comment.