From defa58802ac1177590d5a8b9450469c30e3a0552 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 May 2023 06:30:31 +0000
Subject: [PATCH 01/11] Bump requests from 2.28.2 to 2.31.0

Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 0279b36..f0e3b57 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "aiobotocore"
@@ -1930,21 +1930,21 @@ pyyaml = "*"
 
 [[package]]
 name = "requests"
-version = "2.28.2"
+version = "2.31.0"
 description = "Python HTTP for Humans."
 category = "dev"
 optional = false
-python-versions = ">=3.7, <4"
+python-versions = ">=3.7"
 files = [
-    {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
-    {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
+    {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
+    {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
 ]
 
 [package.dependencies]
 certifi = ">=2017.4.17"
 charset-normalizer = ">=2,<4"
 idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<1.27"
+urllib3 = ">=1.21.1,<3"
 
 [package.extras]
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]

From 893340a7d2b40e00b31bbbe2edd3cdca49ca48ec Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 May 2023 18:58:56 +0000
Subject: [PATCH 02/11] Bump tornado from 6.3.1 to 6.3.2

Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.3.1 to 6.3.2.
- [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst)
- [Commits](https://github.com/tornadoweb/tornado/compare/v6.3.1...v6.3.2)

---
updated-dependencies:
- dependency-name: tornado
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 0279b36..a104305 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "aiobotocore"
@@ -2190,23 +2190,23 @@ files = [
 
 [[package]]
 name = "tornado"
-version = "6.3.1"
+version = "6.3.2"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 category = "main"
 optional = false
 python-versions = ">= 3.8"
 files = [
-    {file = "tornado-6.3.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:db181eb3df8738613ff0a26f49e1b394aade05034b01200a63e9662f347d4415"},
-    {file = "tornado-6.3.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b4e7b956f9b5e6f9feb643ea04f07e7c6b49301e03e0023eedb01fa8cf52f579"},
-    {file = "tornado-6.3.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9661aa8bc0e9d83d757cd95b6f6d1ece8ca9fd1ccdd34db2de381e25bf818233"},
-    {file = "tornado-6.3.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81c17e0cc396908a5e25dc8e9c5e4936e6dfd544c9290be48bd054c79bcad51e"},
-    {file = "tornado-6.3.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a27a1cfa9997923f80bdd962b3aab048ac486ad8cfb2f237964f8ab7f7eb824b"},
-    {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d7117f3c7ba5d05813b17a1f04efc8e108a1b811ccfddd9134cc68553c414864"},
-    {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:ffdce65a281fd708da5a9def3bfb8f364766847fa7ed806821a69094c9629e8a"},
-    {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:90f569a35a8ec19bde53aa596952071f445da678ec8596af763b9b9ce07605e6"},
-    {file = "tornado-6.3.1-cp38-abi3-win32.whl", hash = "sha256:3455133b9ff262fd0a75630af0a8ee13564f25fb4fd3d9ce239b8a7d3d027bf8"},
-    {file = "tornado-6.3.1-cp38-abi3-win_amd64.whl", hash = "sha256:1285f0691143f7ab97150831455d4db17a267b59649f7bd9700282cba3d5e771"},
-    {file = "tornado-6.3.1.tar.gz", hash = "sha256:5e2f49ad371595957c50e42dd7e5c14d64a6843a3cf27352b69c706d1b5918af"},
+    {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"},
+    {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"},
+    {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"},
+    {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"},
+    {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"},
+    {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"},
+    {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"},
+    {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"},
+    {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"},
+    {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"},
+    {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"},
 ]
 
 [[package]]

From 27f171c80ccb1ea9324dc6549999c764d4f59ceb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 20 Jun 2023 15:19:13 -0400
Subject: [PATCH 03/11] feat: use generic models

---
 ground_truth_test.py                     | 34 ++++++++-----
 src/fibsem_tools/metadata/groundtruth.py | 65 +++++++++++++++---------
 2 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/ground_truth_test.py b/ground_truth_test.py
index ce76432..b0f4f63 100755
--- a/ground_truth_test.py
+++ b/ground_truth_test.py
@@ -2,16 +2,17 @@
 from fibsem_tools import read_xarray
 import json
 from fibsem_tools.metadata.groundtruth import (
-    AnnotationEncoding,
+    AnnotationProtocol,
+    MultiscaleGroupAttrs,
+    SemanticAnnotation,
     classNameDict,
     AnnotationArrayAttrs,
-    AnnotationClassAttrs,
     AnnotationCropAttrs,
 )
 from rich import print_json
 import numpy as np
 import datetime
-from typing import Dict, TypedDict, List, TypeVar
+from typing import Dict, Literal, TypedDict, List, TypeVar
 
 Key = TypeVar("Key", bound=str)
 
@@ -32,6 +33,9 @@ class CropMeta(TypedDict):
 out_dtype = "uint8"
 out_dtype_max = np.iinfo(out_dtype).max
 
+tnamesT = Literal["ERES membrane"]
+tnames = ["ERES membrane"]
+
 crop_key: Key = "Crop13"
 group = read_xarray(uri)
 arr = group["s0"].data
@@ -48,7 +52,10 @@ class CropMeta(TypedDict):
 
 crop = arr.sel(selecter, method="nearest")
 crop_attrs = AnnotationCropAttrs(
-    name=crop_key, description="A crop", protocol=None, doi=None
+    name=crop_key,
+    description="A crop",
+    protocol=AnnotationProtocol[tnamesT](url="www.google.com", classNames=tnames),
+    doi=None,
 )
 
 out_attrs = {}
@@ -56,17 +63,21 @@ class CropMeta(TypedDict):
 # partition the subvolume into separate integer classes
 vals = np.unique(crop)
 
-
 for v in vals:
+
     name, description = classNameDict[v].short, classNameDict[v].long
+    if name != "ERES membrane":
+        continue
 
     subvol = (crop == v).astype(out_dtype)
-    census = {k: np.sum(subvol == k) for k in np.unique(subvol)}
-    encoding: AnnotationEncoding = {"absent": 0, "unknown": 255}
-    array_attrs = AnnotationArrayAttrs(census=census, encoding=encoding, object=name)
+    type = SemanticAnnotation(encoding={"absent": 0, "unknown": 255})
+    histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()}
+    array_attrs = AnnotationArrayAttrs[tnamesT](
+        specialValuesHist=histogram, type=type, className=name
+    )
 
-    group_attrs = AnnotationClassAttrs(
-        name=name,
+    group_attrs = MultiscaleGroupAttrs[tnamesT](
+        className=name,
         description=description,
         created_by=[
             "Cellmap annotators",
@@ -74,8 +85,7 @@ class CropMeta(TypedDict):
         created_with=["Amira", "Paintera"],
         start_date=datetime.datetime.now().isoformat(),
         duration_days=10,
-        encoding=encoding,
-        type="instance",
+        type=type,
     )
 
     out_attrs[f"/{crop_key}/{name}"] = {"annotation": group_attrs.dict()}
diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py
index ed5fe1a..acc47ef 100644
--- a/src/fibsem_tools/metadata/groundtruth.py
+++ b/src/fibsem_tools/metadata/groundtruth.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 from enum import Enum
-from typing import Dict, List, Literal, Optional, Union
+from typing import Dict, Generic, List, Literal, Optional, TypeVar, Union
 
-from pydantic import BaseModel
+from pydantic import BaseModel, root_validator
+from pydantic.generics import GenericModel
 
 
-AnnotationType = Union[Literal["semantic"], Literal["instance"]]
+class StrictBase(BaseModel):
+    class Config:
+        extra = "forbid"
 
 
 class InstanceName(BaseModel):
@@ -83,36 +86,48 @@ class LabelList(BaseModel):
     39: InstanceName(short="Glycogen", long="Glycogen"),
 }
 
+Possibility = Literal["unknown", "absent"]
 
-class SemanticAnnotation(BaseModel):
-    type: Literal["semantic"]
-    encoding: Dict[int, str]
 
+class SemanticSegmentation(BaseModel):
+    type: Literal["semantic_segmentation"] = "semantic_segmentation"
+    encoding: Dict[Union[Possibility, Literal["present"]], int]
 
-class InstanceAnnotation(BaseModel):
-    type: Literal["instance"]
-    encoding: Dict[int, Possibility]
 
+class InstanceSegmentation(BaseModel):
+    type: Literal["instance_segmentation"] = "instance_segmentation"
+    encoding: Dict[Possibility, int]
 
-Possibility = Union[Literal["unknown"], Literal["absent"], Literal["present"]]
 
-AnnotationEncoding = Dict[Possibility, int]
+AnnotationType = Union[SemanticSegmentation, InstanceSegmentation]
 
+TName = TypeVar("TName", bound=str)
 
-class AnnotationArrayAttrs(BaseModel):
+
+class AnnotationArrayAttrs(GenericModel, Generic[TName]):
     """
     The metadata for an array of annotated values.
     """
 
-    objects: str
+    className: TName
     # a mapping from values to frequencies
-    census: Dict[int, int]
+    histogram: Optional[Dict[Possibility, int]]
     # a mapping from class names to values
     # this is array metadata because labels might disappear during downsampling
-    encoding: AnnotationEncoding
+    annotation_type: AnnotationType
+
+    @root_validator()
+    def check_encoding(cls, values):
+        if (typ := values.get("type", False)) and (
+            hist := values.get("histogram", False)
+        ):
+            # check that everything in the histogram is encoded
+            assert set(typ.encoding.keys()).issuperset((hist.keys())), "Oh no"
+
+        return values
 
 
-class AnnotationClassAttrs(BaseModel):
+class MultiscaleGroupAttrs(GenericModel, Generic[TName]):
     """
     The metadata for an individual annotated semantic class.
     In a storage hierarchy like zarr or hdf5, this metadata is associated with a
@@ -120,23 +135,27 @@ class AnnotationClassAttrs(BaseModel):
     annotation data in a multiscale representation.
     """
 
-    name: str
+    class_name: TName
     description: str
-    created_by: List[str]
-    created_with: List[str]
+    created_by: list[str]
+    created_with: list[str]
     start_date: str | None
     end_date: str | None
     duration_days: int | None
-    type: AnnotationType
-    encoding: AnnotationEncoding
+    annotation_type: AnnotationType
+
+
+class AnnotationProtocol(GenericModel, Generic[TName]):
+    url: str
+    class_names: list[TName]
 
 
-class AnnotationCropAttrs(BaseModel):
+class AnnotationCropAttrs(GenericModel, Generic[TName]):
     """
     The metadata for all annotations in a single crop.
     """
 
     name: Optional[str]
     description: Optional[str]
-    protocol: Optional[str]
+    protocol: AnnotationProtocol[TName]
     doi: Optional[str]

From a84158c4b46ca6e1191b480a8f74e67b6449b3a5 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 20 Jun 2023 15:39:16 -0400
Subject: [PATCH 04/11] fix: better ome-ngff version handling

---
 src/fibsem_tools/io/zarr.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fibsem_tools/io/zarr.py b/src/fibsem_tools/io/zarr.py
index 359f2e6..958df21 100644
--- a/src/fibsem_tools/io/zarr.py
+++ b/src/fibsem_tools/io/zarr.py
@@ -329,9 +329,10 @@ def infer_coords(array: zarr.Array) -> List[DataArray]:
     elif (multiscales := group.attrs.get("multiscales", None)) is not None:
         if len(multiscales) > 0:
             multiscale = multiscales[0]
-            if (ngff_version := multiscale.get("version", None)) == "0.4":
+            ngff_version = multiscale.get("version", None)
+            if ngff_version == "0.4":
                 from pydantic_ome_ngff.v04 import Multiscale
-            elif multiscale["version"] == "0.5-dev":
+            elif ngff_version == "0.5-dev":
                 from pydantic_ome_ngff.latest import Multiscale
             else:
                 raise ValueError(
@@ -341,7 +342,7 @@ def infer_coords(array: zarr.Array) -> List[DataArray]:
                     """
                 )
         else:
-            raise ValueError("Multiscales attribute was empty")
+            raise ValueError("Multiscales attribute was empty.")
         xarray_adapters = get_adapters(ngff_version)
         multiscales_meta = [Multiscale(**entry) for entry in multiscales]
         transforms = []

From 761eb34bbfc6dede9e0dd821d4cca95c30c71dab Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 21 Jun 2023 15:50:54 -0400
Subject: [PATCH 05/11] fix: normalize field names

---
 ground_truth_test.py                     | 16 +++++++++-------
 src/fibsem_tools/metadata/groundtruth.py |  5 ++++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/ground_truth_test.py b/ground_truth_test.py
index b0f4f63..9d0fc1f 100755
--- a/ground_truth_test.py
+++ b/ground_truth_test.py
@@ -4,7 +4,7 @@
 from fibsem_tools.metadata.groundtruth import (
     AnnotationProtocol,
     MultiscaleGroupAttrs,
-    SemanticAnnotation,
+    SemanticSegmentation,
     classNameDict,
     AnnotationArrayAttrs,
     AnnotationCropAttrs,
@@ -54,7 +54,7 @@ class CropMeta(TypedDict):
 crop_attrs = AnnotationCropAttrs(
     name=crop_key,
     description="A crop",
-    protocol=AnnotationProtocol[tnamesT](url="www.google.com", classNames=tnames),
+    protocol=AnnotationProtocol[tnamesT](url="www.google.com", class_names=tnames),
     doi=None,
 )
 
@@ -70,14 +70,14 @@ class CropMeta(TypedDict):
         continue
 
     subvol = (crop == v).astype(out_dtype)
-    type = SemanticAnnotation(encoding={"absent": 0, "unknown": 255})
+    type = SemanticSegmentation(encoding={"absent": 0, "unknown": 255})
     histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()}
     array_attrs = AnnotationArrayAttrs[tnamesT](
-        specialValuesHist=histogram, type=type, className=name
+        histogram=histogram, annotation_type=type, class_name=name
     )
 
     group_attrs = MultiscaleGroupAttrs[tnamesT](
-        className=name,
+        class_name=name,
         description=description,
         created_by=[
             "Cellmap annotators",
@@ -85,10 +85,12 @@ class CropMeta(TypedDict):
         created_with=["Amira", "Paintera"],
         start_date=datetime.datetime.now().isoformat(),
         duration_days=10,
-        type=type,
+        annotation_type=type,
     )
 
-    out_attrs[f"/{crop_key}/{name}"] = {"annotation": group_attrs.dict()}
+    out_attrs[f"/{crop_key}/{name.lower().replace(' ', '_')}"] = {
+        "annotation": group_attrs.dict()
+    }
     out_attrs[f"/{crop_key}/{name}/s0"] = {"annotation": array_attrs.dict()}
 
 
diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py
index acc47ef..4f4c708 100644
--- a/src/fibsem_tools/metadata/groundtruth.py
+++ b/src/fibsem_tools/metadata/groundtruth.py
@@ -109,7 +109,7 @@ class AnnotationArrayAttrs(GenericModel, Generic[TName]):
     The metadata for an array of annotated values.
     """
 
-    className: TName
+    class_name: TName
     # a mapping from values to frequencies
     histogram: Optional[Dict[Possibility, int]]
     # a mapping from class names to values
@@ -149,6 +149,9 @@ class AnnotationProtocol(GenericModel, Generic[TName]):
     url: str
     class_names: list[TName]
 
+    class Config:
+        allow_extra = "forbid"
+
 
 class AnnotationCropAttrs(GenericModel, Generic[TName]):
     """

From 9f6b6a833c3ce164b2d2dc669d66b1afc2731eee Mon Sep 17 00:00:00 2001
From: mzouink <zouinkhi.marwan@gmail.com>
Date: Thu, 29 Jun 2023 18:07:03 -0400
Subject: [PATCH 06/11] add new organelles classes

---
 src/fibsem_tools/metadata/groundtruth.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py
index 47a8b7f..49af792 100644
--- a/src/fibsem_tools/metadata/groundtruth.py
+++ b/src/fibsem_tools/metadata/groundtruth.py
@@ -41,10 +41,7 @@ class LabelList(BaseModel):
     1: InstanceName(short="ECS", long="Extracellular Space"),
     2: InstanceName(short="Plasma membrane", long="Plasma membrane"),
     3: InstanceName(short="Mito membrane", long="Mitochondrial membrane"),
-    4: InstanceName(
-        short="Mito lumen",
-        long="Mitochondrial lumen",
-    ),
+    4: InstanceName(short="Mito lumen",long="Mitochondrial lumen"),
     5: InstanceName(short="Mito DNA", long="Mitochondrial DNA"),
     6: InstanceName(short="Golgi Membrane", long="Golgi apparatus membrane"),
     7: InstanceName(short="Golgi lumen", long="Golgi apparatus lumen"),
@@ -58,9 +55,7 @@ class LabelList(BaseModel):
     15: InstanceName(short="LD lumen", long="Lipid droplet lumen"),
     16: InstanceName(short="ER membrane", long="Endoplasmic reticulum membrane"),
     17: InstanceName(short="ER lumen", long="Endoplasmic reticulum membrane"),
-    18: InstanceName(
-        short="ERES membrane", long="Endoplasmic reticulum exit site membrane"
-    ),
+    18: InstanceName(short="ERES membrane", long="Endoplasmic reticulum exit site membrane"),
     19: InstanceName(short="ERES lumen", long="Endoplasmic reticulum exit site lumen"),
     20: InstanceName(short="NE membrane", long="Nuclear envelope membrane"),
     21: InstanceName(short="NE lumen", long="Nuclear envelope lumen"),
@@ -82,4 +77,13 @@ class LabelList(BaseModel):
     37: InstanceName(short="Nucleus combined", long="Nucleus combined"),
     38: InstanceName(short="Vimentin", long="Vimentin"),
     39: InstanceName(short="Glycogen", long="Glycogen"),
+    40: InstanceName(short="Cardiac neurons", long="Cardiac neurons"),
+    41: InstanceName(short="Endothelial cells", long="Endothelial cells"),
+    42: InstanceName(short="Cardiomyocytes", long="Cardiomyocytes"),
+    43: InstanceName(short="Epicardial cells", long="Epicardial cells"),
+    44: InstanceName(short="Parietal pericardial cells", long="Parietal pericardial cells"),
+    45: InstanceName(short="Red blood cells", long="Red blood cells"),
+    46: InstanceName(short="White blood cells", long="White blood cells"),
+    47: InstanceName(short="Peroxisome membrane", long="Peroxisome membrane"),
+    48: InstanceName(short="Peroxisome lumen", long="Peroxisome lumen"),
 }

From 64efdce4598dc810fd23768be862dc689b486713 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 Jun 2023 23:01:17 -0400
Subject: [PATCH 07/11] chore: patch release

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6954b4b..ed6c482 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fibsem-tools"
-version = "4.0.2"
+version = "4.0.3"
 description = "Tools for processing FIBSEM datasets"
 authors = ["Davis Vann Bennett <davis.v.bennett@gmail.com>"]
 license = "MIT"

From 3a9d6c7c9473073fd3c7d95c5cb783ce005068bf Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 30 Jun 2023 21:59:16 -0400
Subject: [PATCH 08/11] feat: gha for publishing to pypi

---
 .github/workflows/upload_pypi.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/workflows/upload_pypi.yml

diff --git a/.github/workflows/upload_pypi.yml b/.github/workflows/upload_pypi.yml
new file mode 100644
index 0000000..1780ce9
--- /dev/null
+++ b/.github/workflows/upload_pypi.yml
@@ -0,0 +1,24 @@
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-20.04
+
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install poetry=1.4.1
+    - name: Build and publish
+      env:
+        PYPI_USERNAME: __token__
+        PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        poetry build
+        poetry publish
\ No newline at end of file

From 9fdc9bb512bc663df922068de779d38d371c6a9a Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 1 Jul 2023 12:38:20 -0400
Subject: [PATCH 09/11] chore: remove demo file

---
 ground_truth_test.py | 97 --------------------------------------------
 1 file changed, 97 deletions(-)
 delete mode 100755 ground_truth_test.py

diff --git a/ground_truth_test.py b/ground_truth_test.py
deleted file mode 100755
index 9d0fc1f..0000000
--- a/ground_truth_test.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# from fibsem_tools.metadata.groundtruth import GroupMetadata
-from fibsem_tools import read_xarray
-import json
-from fibsem_tools.metadata.groundtruth import (
-    AnnotationProtocol,
-    MultiscaleGroupAttrs,
-    SemanticSegmentation,
-    classNameDict,
-    AnnotationArrayAttrs,
-    AnnotationCropAttrs,
-)
-from rich import print_json
-import numpy as np
-import datetime
-from typing import Dict, Literal, TypedDict, List, TypeVar
-
-Key = TypeVar("Key", bound=str)
-
-
-class CropMeta(TypedDict):
-    maxId: int
-    name: str
-    offset: List[float]
-    offset_unit: str
-    resolution: List[float]
-    resulution_unit: str
-    type: str
-
-
-dataset = "jrc_hela-2"
-bucket = "janelia-cosem-datasets"
-uri = f"s3://{bucket}/{dataset}/{dataset}.n5/labels/gt/"
-out_dtype = "uint8"
-out_dtype_max = np.iinfo(out_dtype).max
-
-tnamesT = Literal["ERES membrane"]
-tnames = ["ERES membrane"]
-
-crop_key: Key = "Crop13"
-group = read_xarray(uri)
-arr = group["s0"].data
-subvolumeMeta: Dict[Key, CropMeta] = arr.attrs["subvolumes"]
-sMeta = subvolumeMeta[crop_key]
-dims = ("x", "y", "z")
-
-scales = arr.attrs["transform"]["scale"][::-1]
-offsets = np.multiply(sMeta["offset"], np.divide(scales, sMeta["resolution"]))
-selecter = {
-    d: (np.arange(100) * scale) + offset
-    for d, offset, scale in zip(dims, offsets, scales)
-}
-
-crop = arr.sel(selecter, method="nearest")
-crop_attrs = AnnotationCropAttrs(
-    name=crop_key,
-    description="A crop",
-    protocol=AnnotationProtocol[tnamesT](url="www.google.com", class_names=tnames),
-    doi=None,
-)
-
-out_attrs = {}
-out_attrs[f"/{crop_key}"] = {"annotation": crop_attrs.dict()}
-# partition the subvolume into separate integer classes
-vals = np.unique(crop)
-
-for v in vals:
-
-    name, description = classNameDict[v].short, classNameDict[v].long
-    if name != "ERES membrane":
-        continue
-
-    subvol = (crop == v).astype(out_dtype)
-    type = SemanticSegmentation(encoding={"absent": 0, "unknown": 255})
-    histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()}
-    array_attrs = AnnotationArrayAttrs[tnamesT](
-        histogram=histogram, annotation_type=type, class_name=name
-    )
-
-    group_attrs = MultiscaleGroupAttrs[tnamesT](
-        class_name=name,
-        description=description,
-        created_by=[
-            "Cellmap annotators",
-        ],
-        created_with=["Amira", "Paintera"],
-        start_date=datetime.datetime.now().isoformat(),
-        duration_days=10,
-        annotation_type=type,
-    )
-
-    out_attrs[f"/{crop_key}/{name.lower().replace(' ', '_')}"] = {
-        "annotation": group_attrs.dict()
-    }
-    out_attrs[f"/{crop_key}/{name}/s0"] = {"annotation": array_attrs.dict()}
-
-
-print_json(json.dumps(out_attrs))

From aa529d60c1f32173727a4fa35e9db22394a45dee Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 22 Aug 2023 14:51:30 -0400
Subject: [PATCH 10/11] feat: add zarr scanning cli tool to check for invalid
 chunks and potentially delete them

---
 src/fibsem_tools/cli/zarr_scan.py | 106 ++++++++++++++++++++++++++++++
 src/fibsem_tools/io/zarr.py       |   2 +-
 tests/test_zarr.py                |  23 +++++++
 3 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 src/fibsem_tools/cli/zarr_scan.py

diff --git a/src/fibsem_tools/cli/zarr_scan.py b/src/fibsem_tools/cli/zarr_scan.py
new file mode 100644
index 0000000..53cf3eb
--- /dev/null
+++ b/src/fibsem_tools/cli/zarr_scan.py
@@ -0,0 +1,106 @@
+from typing import Literal, Union
+import click
+import zarr
+from fibsem_tools import access
+from rich import print
+from fibsem_tools.io.zarr import get_chunk_keys
+from rich.progress import track
+import time
+from dataclasses import dataclass
+
+ChunkState = Literal["valid", "missing", "invalid"]
+
+
+@dataclass
+class Missing:
+    variant = "missing"
+
+
+@dataclass
+class Invalid:
+    variant = "invalid"
+    exception: BaseException
+
+
+@dataclass
+class Valid:
+    variant = "valid"
+
+
+class ChunkSetResults(dict[ChunkState, dict[str, Union[Missing, Valid, Invalid]]]):
+    pass
+
+
+def check_zarray(array: zarr.Array) -> dict[str, Union[Missing, Invalid, Valid]]:
+    ckeys = tuple(get_chunk_keys(array))
+    results = {}
+    for ckey in track(ckeys, description="Checking chunks..."):
+        try:
+            array._decode_chunk(array.store[ckey])
+            results[ckey] = Valid()
+        except OSError as e:
+            results[ckey] = Invalid(exception=e)
+        except KeyError:
+            results[ckey] = Missing()
+
+    return results
+
+
+@click.command()
+@click.argument("array_path", type=click.STRING)
+@click.option(
+    "--valid",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="report valid chunks",
+)
+@click.option(
+    "--missing",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="report missing chunks",
+)
+@click.option(
+    "--invalid",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="report invalid chunks",
+)
+@click.option(
+    "--delete-invalid",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="delete invalid chunks",
+)
+def cli(array_path, valid, missing, invalid, delete_invalid):
+    start = time.time()
+    array = access(array_path, mode="r")
+    all_results = check_zarray(array)
+    # categorize
+    results_categorized: ChunkSetResults = {"valid": {}, "missing": {}, "invalid": {}}
+    for key, value in all_results.items():
+        results_categorized[value.variant][key] = value
+
+    to_show = {}
+
+    for flag, opt in zip((valid, missing, invalid), ("valid", "missing", "invalid")):
+        if flag:
+            to_show[opt] = results_categorized[opt]
+    print(to_show)
+    if delete_invalid:
+        array_a = access(array_path, mode="a")
+        num_invalid = len(results_categorized["invalid"])
+        for res in track(
+            results_categorized["invalid"],
+            description=f"Deleting {num_invalid} invalid chunks...",
+        ):
+            del array_a.store[res]
+    print(f"Completed after {time.time() - start}s")
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/src/fibsem_tools/io/zarr.py b/src/fibsem_tools/io/zarr.py
index 958df21..5c5ba13 100644
--- a/src/fibsem_tools/io/zarr.py
+++ b/src/fibsem_tools/io/zarr.py
@@ -216,7 +216,7 @@ def access_zarr(
 
     array_or_group = zarr.open(store, path=path, **kwargs, mode=access_mode)
 
-    if access_mode != "r":
+    if access_mode != "r" and len(attrs) > 0:
         array_or_group.attrs.update(attrs)
     return array_or_group
 
diff --git a/tests/test_zarr.py b/tests/test_zarr.py
index 876dab4..95129c7 100644
--- a/tests/test_zarr.py
+++ b/tests/test_zarr.py
@@ -3,8 +3,10 @@
 import pytest
 from xarray import DataArray
 from zarr.storage import FSStore
+from pathlib import Path
 import zarr
 import numpy as np
+import itertools
 from fibsem_tools.io.core import read_dask, read_xarray
 from fibsem_tools.io.multiscale import multiscale_group
 from fibsem_tools.io.xr import stt_from_array
@@ -15,6 +17,7 @@
     access_zarr,
     create_dataarray,
     create_datatree,
+    get_chunk_keys,
     get_url,
     to_dask,
     to_xarray,
@@ -247,3 +250,23 @@ def test_dask(temp_zarr, chunks):
     assert np.array_equal(observed, data)
 
     assert np.array_equal(read_dask(get_url(zarray), chunks).compute(), data)
+
+
+@pytest.mark.parametrize(
+    "store_class", (zarr.N5Store, zarr.DirectoryStore, zarr.NestedDirectoryStore)
+)
+@pytest.mark.parametrize("shape", ((10,), (10, 11, 12)))
+def test_chunk_keys(tmp_path: Path, store_class, shape):
+    store: zarr.storage.BaseStore = store_class(tmp_path)
+    arr_path = "test"
+    arr = zarr.create(
+        shape=shape, store=store, path=arr_path, chunks=(2,) * len(shape), dtype="uint8"
+    )
+
+    dim_sep = arr._dimension_separator
+    chunk_idcs = itertools.product(*(range(c_s) for c_s in arr.cdata_shape))
+    expected = tuple(
+        os.path.join(arr.path, dim_sep.join(map(str, idx))) for idx in chunk_idcs
+    )
+    observed = tuple(get_chunk_keys(arr))
+    assert observed == expected

From 0da50de1d3108e66e21bd017228aaf687f8eb18c Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 22 Aug 2023 16:46:47 -0400
Subject: [PATCH 11/11] chore: version bump

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ed6c482..e9ad210 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fibsem-tools"
-version = "4.0.3"
+version = "4.0.4"
 description = "Tools for processing FIBSEM datasets"
 authors = ["Davis Vann Bennett <davis.v.bennett@gmail.com>"]
 license = "MIT"