Skip to content

Commit

Permalink
Merge pull request #56 from janelia-cosem/use_pydantic_zarr
Browse files Browse the repository at this point in the history
use pydantic zarr
  • Loading branch information
d-v-b authored Aug 22, 2023
2 parents 0da50de + 3b4487b commit 0b237e0
Show file tree
Hide file tree
Showing 9 changed files with 1,008 additions and 1,180 deletions.
1,820 changes: 792 additions & 1,028 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ click = "^8.1.3"
dask = "^2023.3.2"
textual = "^0.16.0"
aiohttp = "^3.8.4"
httpx = {extras = ["http2"], version = "^0.23.3"}
xarray-datatree = "^0.0.12"
pydantic-zarr = "^0.5.0"


[tool.poetry.group.dev.dependencies]
Expand Down
122 changes: 41 additions & 81 deletions src/fibsem_tools/io/multiscale.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
from __future__ import annotations
from typing import Any, Dict, Literal, Optional, Sequence, Tuple, Union, List
from typing import Any, Literal, Optional, Sequence, Tuple, Union, List

from xarray import DataArray

import zarr
from fibsem_tools.io.core import AccessMode, create_group
from fibsem_tools.metadata.cosem import COSEMGroupMetadataV1, COSEMGroupMetadataV2
from fibsem_tools.metadata.neuroglancer import NeuroglancerN5GroupMetadata
from fibsem_tools.metadata.transform import STTransform
from zarr.errors import ContainsGroupError
from fibsem_tools.metadata.cosem import (
CosemMultiscaleGroupV1,
CosemMultiscaleGroupV2,
)
from fibsem_tools.metadata.neuroglancer import (
NeuroglancerN5Group,
)
from numcodecs.abc import Codec
from xarray_ome_ngff.registry import get_adapters
from pydantic_zarr import GroupSpec, ArraySpec


from fibsem_tools.io.util import Attrs, JSON

NGFF_DEFAULT_VERSION = "0.4"
multiscale_metadata_types = ["neuroglancer", "cellmap", "cosem", "ome-ngff"]

Expand All @@ -33,7 +34,8 @@ def _normalize_chunks(
if all_ints:
result = (chunks,) * len(arrays)
else:
raise ValueError(f"All values in chunks must be ints. Got {chunks}")
msg = f"All values in chunks must be ints. Got {chunks}"
raise ValueError(msg)
except TypeError as e:
raise e

Expand All @@ -44,56 +46,56 @@ def _normalize_chunks(
return result


def multiscale_metadata(
def multiscale_group(
arrays: Sequence[DataArray],
metadata_types: List[str],
array_paths: Optional[List[str]] = None,
) -> Tuple[Dict[str, JSON], List[Dict[str, JSON]]]:
array_paths: Union[List[str], Literal["auto"]] = "auto",
name: Optional[str] = None,
**kwargs,
) -> GroupSpec:
"""
Generate multiscale metadata of the desired flavor from a list of DataArrays
Returns
-------
A tuple of dicts with string keys and JSON-serializable values
A GroupSpec instance representing the multiscale group
"""
if array_paths == "auto":
array_paths = [f"s{idx}" for idx in range(len(arrays))]
group_attrs = {}
array_attrs: List[Dict[str, Any]] = [{}] * len(arrays)
array_attrs = {path: {} for path in array_paths}

if any(f.startswith("ome-ngff") for f in metadata_types) and any(
f.startswith("cosem") for f in metadata_types
):
raise ValueError(
f"""
msg = f"""
You requested {metadata_types}, but ome-ngff metadata and cosem metadata are
incompatible. Use just ome-ngff metadata instead.
"""
)
raise ValueError(msg)

for flavor in metadata_types:
flave, _, version = flavor.partition("@")

if flave == "neuroglancer":
g_meta = NeuroglancerN5GroupMetadata.fromDataArrays(arrays)
group_attrs.update(g_meta.dict())
g_spec = NeuroglancerN5Group.from_xarrays(arrays, **kwargs)
group_attrs.update(g_spec.attrs.dict())
elif flave == "cosem":
if version == "2":
g_meta = COSEMGroupMetadataV2.fromDataArrays(arrays, array_paths)
g_spec = CosemMultiscaleGroupV2.from_xarrays(
arrays, name=name, **kwargs
)
else:
g_meta = COSEMGroupMetadataV1.fromDataArrays(arrays, array_paths)
group_attrs.update(g_meta.dict())
for idx in range(len(array_attrs)):
array_attrs[idx] = {
"transform": STTransform.fromDataArray(arrays[idx]).dict(),
**array_attrs[idx],
}
elif flave == "ome-ngff":
if array_paths is None:
raise ValueError(
f"""
You requested {flave}-type metadata, but array_paths was set to None.
array_paths must be set to a list of strings to use this metadata.
"""
g_spec = CosemMultiscaleGroupV1.from_xarrays(
arrays, name=name, **kwargs
)
group_attrs.update(g_spec.attrs.dict())

for key, value in g_spec.items.items():
array_attrs[key].update(**value.attrs.dict())
elif flave == "ome-ngff":
if version == "":
version = NGFF_DEFAULT_VERSION
adapters = get_adapters(version)
Expand All @@ -109,54 +111,12 @@ def multiscale_metadata(
{multiscale_metadata_types}
"""
)
return group_attrs, array_attrs

members = {
path: ArraySpec.from_array(arr, attrs=array_attrs[path], **kwargs)
for arr, path in zip(arrays, array_paths)
}

def multiscale_group(
url: str,
arrays: List[DataArray],
array_paths: List[str],
chunks: Tuple[Tuple[int, ...], ...] | Tuple[int, ...] | None,
metadata_types: List[str],
group_mode: AccessMode = "w-",
array_mode: AccessMode = "w-",
group_attrs: Attrs | None = None,
array_attrs: Sequence[Attrs] | None = None,
**kwargs: Any,
) -> zarr.Group:

if array_attrs is None:
array_attrs = [{}] * len(arrays)
if group_attrs is None:
group_attrs = {}

mgroup_attrs, marray_attrs = multiscale_metadata(
arrays, metadata_types, array_paths=array_paths
)
_group_attrs = {**group_attrs, **mgroup_attrs}
_arr_attrs = [{**a, **m} for a, m in zip(array_attrs, marray_attrs)]

_chunks = _normalize_chunks(arrays, chunks)
try:
group = create_group(
url,
arrays,
array_paths=array_paths,
chunks=_chunks,
group_attrs=_group_attrs,
array_attrs=_arr_attrs,
group_mode=group_mode,
array_mode=array_mode,
**kwargs,
)
return group
except ContainsGroupError:
raise FileExistsError(
f"""
The resource at {url} resolves to an existing group. Use 'w' or 'a'
access modes to enable writable / appendable access to this group.
"""
)
return GroupSpec(attrs=group_attrs, members=members)


def prepare_multiscale(
Expand Down
99 changes: 84 additions & 15 deletions src/fibsem_tools/metadata/cosem.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Optional, Sequence
from typing import Iterable, Literal, Optional, Sequence, Union

from pydantic import BaseModel
from xarray import DataArray

from pydantic_zarr import GroupSpec, ArraySpec
from fibsem_tools.metadata.transform import STTransform


Expand All @@ -13,26 +13,26 @@ class ScaleMetaV1(BaseModel):

class MultiscaleMetaV1(BaseModel):
name: Optional[str]
datasets: Sequence[ScaleMetaV1]
datasets: list[ScaleMetaV1]


class MultiscaleMetaV2(BaseModel):
name: Optional[str]
datasets: Sequence[str]
datasets: list[str]


class COSEMGroupMetadataV1(BaseModel):
"""
Multiscale metadata used by COSEM for multiscale datasets saved in N5/Zarr groups.
"""

multiscales: Sequence[MultiscaleMetaV1]
multiscales: list[MultiscaleMetaV1]

@classmethod
def fromDataArrays(
def from_xarrays(
cls,
arrays: Sequence[DataArray],
paths: Sequence[str],
paths: Union[Sequence[str], Literal["auto"]],
name: Optional[str] = None,
):
"""
Expand All @@ -46,9 +46,10 @@ def fromDataArrays(
arrays are assumed to share the same `dims` attributes, albeit with varying
`coords`.
paths : list or tuple of str or None, default=None
paths : Sequence of str or the string literal 'auto', default='auto'
The name on the storage backend for each of the arrays in the multiscale
collection.
collection. If 'auto', then names will be automatically generated using the
format s0, s1, s2, etc
name : str, optional
The name for the multiresolution collection
Expand All @@ -60,13 +61,14 @@ def fromDataArrays(
COSEMGroupMetadata
"""

if paths == "auto":
paths = [f"s{idx}" for idx in range(len(arrays))]

multiscales = [
MultiscaleMetaV1(
name=name,
datasets=[
ScaleMetaV1(
path=path, transform=STTransform.fromDataArray(array=arr)
)
ScaleMetaV1(path=path, transform=STTransform.from_xarray(array=arr))
for path, arr in zip(paths, arrays)
],
)
Expand All @@ -79,13 +81,13 @@ class COSEMGroupMetadataV2(BaseModel):
Multiscale metadata used by COSEM for multiscale datasets saved in N5/Zarr groups.
"""

multiscales: Sequence[MultiscaleMetaV2]
multiscales: list[MultiscaleMetaV2]

@classmethod
def fromDataArrays(
def from_xarrays(
cls,
arrays: Sequence[DataArray],
paths: Sequence[str],
paths: Union[Sequence[str], Literal["auto"]] = "auto",
name: Optional[str] = None,
):
"""
Expand All @@ -111,6 +113,8 @@ def fromDataArrays(
COSEMGroupMetadata
"""
if paths == "auto":
paths = [f"s{idx}" for idx in enumerate(arrays)]

multiscales = [
MultiscaleMetaV2(
Expand All @@ -119,3 +123,68 @@ def fromDataArrays(
)
]
return cls(name=name, multiscales=multiscales, paths=paths)


class CosemArrayAttrs(BaseModel):
transform: STTransform


class CosemMultiscaleArray(ArraySpec):
attrs: CosemArrayAttrs

@classmethod
def from_xarray(cls, array: DataArray, **kwargs):
attrs = CosemArrayAttrs(transform=STTransform.from_xarray(array))
return super().from_array(array, attrs=attrs, **kwargs)


class CosemMultiscaleGroupV1(GroupSpec):
attrs: COSEMGroupMetadataV1
items: dict[str, CosemMultiscaleArray]

@classmethod
def from_xarrays(
cls,
arrays: Iterable[DataArray],
paths: Union[Sequence[str], Literal["auto"]] = "auto",
name: Optional[str] = None,
**kwargs,
):

if paths == "auto":
paths = [f"s{idx}" for idx in range(len(arrays))]

attrs = COSEMGroupMetadataV1.from_xarrays(arrays, paths, name)

array_specs = {
k: CosemMultiscaleArray.from_xarray(arr, **kwargs)
for k, arr in zip(paths, arrays)
}

return cls(attrs=attrs, items=array_specs)


class CosemMultiscaleGroupV2(GroupSpec):
attrs: COSEMGroupMetadataV2
items: dict[str, ArraySpec[CosemArrayAttrs]]

@classmethod
def from_xarrays(
cls,
arrays: Iterable[DataArray],
paths: Union[Sequence[str], Literal["auto"]] = "auto",
name: Optional[str] = None,
**kwargs,
):

if paths == "auto":
paths = [f"s{idx}" for idx in range(len(arrays))]

attrs = COSEMGroupMetadataV2.from_xarrays(arrays, paths, name)

array_specs = {
k: CosemMultiscaleArray.from_xarray(arr, **kwargs)
for k, arr in zip(paths, arrays)
}

return cls(attrs=attrs, items=array_specs)
Loading

0 comments on commit 0b237e0

Please sign in to comment.