Skip to content

Commit

Permalink
Merge pull request #13 from janelia-cellmap/chunks_bugfix
Browse files Browse the repository at this point in the history
chunks bugfix
  • Loading branch information
d-v-b authored Jun 14, 2024
2 parents 352feac + 641d298 commit c0677a3
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 58 deletions.
25 changes: 17 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,27 @@ on:
branches: [ main ]

jobs:
build:
test:
name: py=${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v4
- name: Install dependencies
shell: "bash -l {0}"
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install Hatch
run: |
pip install poetry
poetry install
- name: Test
python -m pip install --upgrade pip
pip install hatch
- name: Set Up Hatch Env
run: |
poetry run pytest
hatch env create test.py${{ matrix.python-version }}
hatch env run -e test.py${{ matrix.python-version }} pip list
- name: Run Tests
run: |
hatch env run --env test.py${{ matrix.python-version }} run
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ print(group_model.model_dump(exclude='attributes'))
'pixelResolution': {'dimensions': (10.0, 8.0), 'unit': 'nm'}
},
'shape': (2, 2),
'chunks': (2, 2),
'chunks': (4, 4),
'dtype': '<i8',
'fill_value': 0,
'order': 'C',
Expand Down
100 changes: 75 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,38 +1,88 @@
[tool.poetry]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "cellmap-schemas"
version = "0.7.0"
description = "Schemas for data used by the Cellmap project team at Janelia Research Campus."
authors = ["Davis Vann Bennett <[email protected]>"]
dynamic = ["version"]
description = 'Schemas for data used by the Cellmap project team at Janelia Research Campus.'
readme = "README.md"
packages = [{include = "cellmap_schemas", from = "src"}]
requires-python = ">=3.9"
license = "MIT"
keywords = ["cellmap", "ngff", "n5", "zarr"]
authors = [
{ name = "Davis Vann Bennett", email = "[email protected]" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"pydantic-zarr >= 0.7.0",
"s3fs >= 2023.10.0",
"rich >= 13.7.0"
]

[tool.poetry.dependencies]
python = "^3.9"
pydantic-zarr = "^0.7.0"
s3fs = "^2023.10.0"
rich = "^13.7.0"
[project.urls]
Documentation = "https:www.janelia-cellmap.github.io/cellmap-schemas"
Issues = "https://github.com/janelia-cellmap/cellmap-schemas/issues"
Source = "https://github.com/janelia-cellmap/cellmap-schemas"

[tool.hatch.version]
path = "src/cellmap_schemas/__about__.py"

[tool.hatch.envs.test]
dependencies = [
"pytest",
"pytest-cov==5.0.0",
"pytest-examples == 0.0.10"
]

[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.4.2"
mkdocstrings = {extras = ["python"], version = "^0.23.0"}
[[tool.hatch.envs.test.matrix]]
python = ["3.9", "3.10", "3.11"]

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
coverage = "^7.4.0"
[tool.hatch.envs.test.scripts]
run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
run = "run-coverage --no-cov"

[tool.poetry.group.dev.dependencies]
mypy = "^1.7.1"
pytest-examples = "^0.0.10"

[tool.hatch.envs.docs]
dependencies = [
"mkdocs-material == 9.4.2",
"mkdocstrings[python] == 0.23.0"
]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.hatch.envs.types]
extra-dependencies = [
"mypy>=1.0.0",
]
[tool.hatch.envs.types.scripts]
check = "mypy --install-types --non-interactive {args:src/cellmap_schemas tests}"

[tool.coverage.run]
source_pkgs = ["cellmap_schemas", "tests"]
branch = true
parallel = true
omit = [
"src/cellmap_schemas/__about__.py",
]

[tool.coverage.paths]
cellmap_schemas = ["src/cellmap_schemas", "*/cellmap-schemas/src/cellmap_schemas"]
tests = ["tests", "*/cellmap-schemas/tests"]

[tool.poetry.scripts]
cellmap-schemas = 'cellmap_schemas.cli:cli'
[tool.coverage.report]
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

[tool.ruff]
line-length = 100
Expand All @@ -42,4 +92,4 @@ convention = "numpy"

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
indent-style = "space"
4 changes: 4 additions & 0 deletions src/cellmap_schemas/__about__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# SPDX-FileCopyrightText: 2022-present Howard Hughes Medical Institute
#
# SPDX-License-Identifier: MIT
__version__ = "0.7.0"
41 changes: 41 additions & 0 deletions src/cellmap_schemas/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations
import numpy.typing as npt
from typing import Any, Iterable, Literal
from pydantic_zarr.v2 import ArraySpec, GroupSpec
from pydantic_zarr.v2 import auto_chunks


def structure_group_equal(spec_a: GroupSpec, spec_b: GroupSpec) -> bool:
Expand Down Expand Up @@ -33,3 +36,41 @@ def structure_equal(spec_a: ArraySpec | GroupSpec, spec_b: ArraySpec | GroupSpec
return structure_group_equal(spec_a, spec_b)
else:
return False


def normalize_chunks(
chunks: Literal["auto"] | tuple[int, ...] | tuple[tuple[int, ...]],
arrays: Iterable[npt.NDArray[Any]],
) -> tuple[tuple[int, ...], ...]:
"""
Normalize a specification of chunks against a collection of arrays. Returns a tuple of tuples of ints.
This handles 3 cases:
- If `chunks` is the string "auto", then for each array in `arrays`, the `auto_chunks` routine
from `pydantic-zarr` is used to estimate a chunk size for the first (largest) array, and that chunk size
is used for all arrays
- If `chunks` is a tuple of integers, then that is used as the chunk size for all arrays
- If `chunks` is a tuple of tuples of integers, then that is returned after some minor
validation.
"""
arrays_tuple = tuple(arrays)

if chunks == "auto":
return (auto_chunks(arrays_tuple[0]),) * len(arrays)
elif all(isinstance(x, int) for x in chunks):
return (chunks,) * len(arrays_tuple)
elif all(all(isinstance(x, int) for x in t) for t in chunks):
result = tuple(map(tuple, chunks))
if len(result) != len(arrays_tuple):
msg = (
f"The number of chunks ({len(chunks)}) does not match the number of "
f"arrays ({len(arrays_tuple)})"
)
raise ValueError(msg)
return result
else:
msg = (
f'Invalid chunks: {chunks}. Expected the string "auto"'
"a tuple of ints, or a tuple of tuples of ints."
)
raise ValueError(msg)
9 changes: 7 additions & 2 deletions src/cellmap_schemas/multiscale/cosem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from __future__ import annotations
from typing import Annotated, Any, Literal, Optional, Sequence, TYPE_CHECKING

from cellmap_schemas.base import normalize_chunks

if TYPE_CHECKING:
from typing import Type
from typing_extensions import Self
Expand Down Expand Up @@ -394,15 +396,18 @@ def from_arrays(
compressor: Codec | None | Literal["auto"] = "auto"
The compressor to use for the Zarr arrays.
"""

chunks = normalize_chunks(chunks, arrays)

members = {
path: Array.from_array(
array=array,
chunks=chunks,
chunks=chunk,
compressor=compressor,
dimension_separator="/",
attributes=ArrayMetadata.from_transform(transform),
)
for path, array, transform in zip(paths, arrays, transforms)
for path, array, transform, chunk in zip(paths, arrays, transforms, chunks)
}

metadata = GroupMetadata.from_transforms(
Expand Down
8 changes: 6 additions & 2 deletions src/cellmap_schemas/multiscale/neuroglancer_n5.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Sequence

from cellmap_schemas.base import normalize_chunks

if TYPE_CHECKING:
from typing_extensions import Self, Type, Literal
from numcodecs.abc import Codec
Expand Down Expand Up @@ -212,6 +214,8 @@ def from_arrays(
entirely by Zarr.
"""

chunks = normalize_chunks(chunks, arrays)

if dimension_order == "C":
# this will reverse the order of axes, units, and scales before writing metadata
indexer = slice(-1, None, -1)
Expand Down Expand Up @@ -239,10 +243,10 @@ def from_arrays(
pixelResolution=PixelResolution(dimensions=scale, unit=units_parsed[0])
),
array=array,
chunks=chunks,
chunks=chunk,
compressor=compressor,
)
for path, array, scale in zip(paths, arrays, scales_parsed)
for path, array, chunk, scale in zip(paths, arrays, chunks, scales_parsed)
}
return cls(members=members, attributes=attributes)

Expand Down
32 changes: 30 additions & 2 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from __future__ import annotations
from cellmap_schemas.base import structure_equal

import re
from typing import Literal

import numpy as np
import pytest
from cellmap_schemas.base import normalize_chunks, structure_equal
from pydantic_zarr.v2 import auto_chunks
from pydantic_zarr.v2 import ArraySpec, GroupSpec


Expand Down Expand Up @@ -33,3 +37,27 @@ def test_structure_equal():

group_e = group_a.model_copy(deep=True, update={"members": {"array": array_b}})
assert not structure_equal(group_a, group_e)


@pytest.mark.parametrize("chunks", ("auto", (1, 2, 3), ((1, 2, 3), (1, 2, 3), (2, 3, 4))))
def test_normalize_chunks(chunks: Literal["auto"] | tuple[int, ...] | tuple[tuple[int, ...], ...]):
data = (np.zeros((10, 10, 10)),) * 3
if chunks == "auto":
expected = (auto_chunks(data[0]),) * len(data)
elif isinstance(chunks[0], int):
expected = (chunks,) * len(data)
else:
expected = chunks
observed = normalize_chunks(chunks, data)
assert observed == expected


def test_normalize_chunks_wrong_length():
arrays = (np.zeros((1, 1, 1)),) * 2
chunks = ((1, 1, 1),)
match = (
f"The number of chunks ({len(chunks)}) does not match the number of "
f"arrays ({len(arrays)})"
)
with pytest.raises(ValueError, match=re.escape(match)):
normalize_chunks(chunks, arrays)
19 changes: 9 additions & 10 deletions tests/test_multiscale/test_cosem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations
from typing import TYPE_CHECKING

from cellmap_schemas.base import normalize_chunks

if TYPE_CHECKING:
from typing import Literal

Expand All @@ -21,7 +23,6 @@
change_coordinates,
)
from cellmap_schemas.multiscale.neuroglancer_n5 import PixelResolution
from zarr.util import guess_chunks


@pytest.mark.parametrize("ndim", (2, 3, 4))
Expand Down Expand Up @@ -281,7 +282,7 @@ def test_multiscale_group_from_arrays(order: Literal["C", "F"], name: str | None
}

arrays = {"s0": np.zeros((10, 10, 10)), "s1": np.zeros((5, 5, 5))}

chunks_expected = normalize_chunks("auto", (arrays["s0"],))[0]
groupMeta = GroupMetadata(
axes=axes[reorder],
scales=[[1, 1, 1], [2, 2, 2]],
Expand All @@ -294,7 +295,9 @@ def test_multiscale_group_from_arrays(order: Literal["C", "F"], name: str | None
attributes=groupMeta,
members={
key: Array.from_array(
array=array, attributes=ArrayMetadata.from_transform(transform=transforms[key])
array=array,
chunks=chunks_expected,
attributes=ArrayMetadata.from_transform(transform=transforms[key]),
)
for key, array in arrays.items()
},
Expand Down Expand Up @@ -409,7 +412,7 @@ def test_change_coordinates_3d(


@pytest.mark.parametrize("dimension_order", ("C", "F"))
@pytest.mark.parametrize("chunks", ("auto", ((2, 2, 2))))
@pytest.mark.parametrize("chunks", ("auto", (2, 2, 2), ((1, 1, 1), (2, 2, 2), (3, 3, 3))))
@pytest.mark.parametrize("compressor", (Zstd(3), GZip(-1)))
def test_from_arrays(
dimension_order: Literal["C", "F"],
Expand Down Expand Up @@ -441,14 +444,10 @@ def test_from_arrays(
assert group.attributes.pixelResolution == PixelResolution(
dimensions=scales[0][indexer], unit=units[indexer][0]
)
chunks_expected = normalize_chunks(chunks, arrays)
for idx in range(len(arrays)):
obs = group.members[paths[idx]]
exp = arrays[idx]
if chunks == "auto":
chunks_expected = guess_chunks(exp.shape, exp.dtype.itemsize)
else:
chunks_expected = chunks
assert obs.chunks == chunks_expected
assert obs.chunks == chunks_expected[idx]

assert obs.attributes.pixelResolution == PixelResolution(
dimensions=scales[idx][indexer], unit=units[indexer][0]
Expand Down
Loading

0 comments on commit c0677a3

Please sign in to comment.