From defa58802ac1177590d5a8b9450469c30e3a0552 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 May 2023 06:30:31 +0000 Subject: [PATCH 01/11] Bump requests from 2.28.2 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- poetry.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0279b36..f0e3b57 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "aiobotocore" @@ -1930,21 +1930,21 @@ pyyaml = "*" [[package]] name = "requests" -version = "2.28.2" +version = "2.31.0" description = "Python HTTP for Humans." category = "dev" optional = false -python-versions = ">=3.7, <4" +python-versions = ">=3.7" files = [ - {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, - {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] [package.dependencies] certifi = ">=2017.4.17" charset-normalizer = ">=2,<4" idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" +urllib3 = ">=1.21.1,<3" [package.extras] socks = ["PySocks (>=1.5.6,!=1.5.7)"] From 893340a7d2b40e00b31bbbe2edd3cdca49ca48ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 18:58:56 +0000 Subject: [PATCH 02/11] Bump tornado from 6.3.1 to 6.3.2 Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.3.1 to 6.3.2. - [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst) - [Commits](https://github.com/tornadoweb/tornado/compare/v6.3.1...v6.3.2) --- updated-dependencies: - dependency-name: tornado dependency-type: indirect ... Signed-off-by: dependabot[bot] --- poetry.lock | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0279b36..a104305 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "aiobotocore" @@ -2190,23 +2190,23 @@ files = [ [[package]] name = "tornado" -version = "6.3.1" +version = "6.3.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." category = "main" optional = false python-versions = ">= 3.8" files = [ - {file = "tornado-6.3.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:db181eb3df8738613ff0a26f49e1b394aade05034b01200a63e9662f347d4415"}, - {file = "tornado-6.3.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b4e7b956f9b5e6f9feb643ea04f07e7c6b49301e03e0023eedb01fa8cf52f579"}, - {file = "tornado-6.3.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9661aa8bc0e9d83d757cd95b6f6d1ece8ca9fd1ccdd34db2de381e25bf818233"}, - {file = "tornado-6.3.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81c17e0cc396908a5e25dc8e9c5e4936e6dfd544c9290be48bd054c79bcad51e"}, - {file = "tornado-6.3.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a27a1cfa9997923f80bdd962b3aab048ac486ad8cfb2f237964f8ab7f7eb824b"}, - {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d7117f3c7ba5d05813b17a1f04efc8e108a1b811ccfddd9134cc68553c414864"}, - {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:ffdce65a281fd708da5a9def3bfb8f364766847fa7ed806821a69094c9629e8a"}, - {file = "tornado-6.3.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:90f569a35a8ec19bde53aa596952071f445da678ec8596af763b9b9ce07605e6"}, - {file = "tornado-6.3.1-cp38-abi3-win32.whl", hash = "sha256:3455133b9ff262fd0a75630af0a8ee13564f25fb4fd3d9ce239b8a7d3d027bf8"}, - {file = "tornado-6.3.1-cp38-abi3-win_amd64.whl", hash = "sha256:1285f0691143f7ab97150831455d4db17a267b59649f7bd9700282cba3d5e771"}, - {file = "tornado-6.3.1.tar.gz", hash = "sha256:5e2f49ad371595957c50e42dd7e5c14d64a6843a3cf27352b69c706d1b5918af"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, + {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, + {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, + {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, ] [[package]] From 27f171c80ccb1ea9324dc6549999c764d4f59ceb Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 20 Jun 2023 15:19:13 -0400 Subject: [PATCH 03/11] feat: use generic models --- ground_truth_test.py | 34 ++++++++----- src/fibsem_tools/metadata/groundtruth.py | 65 +++++++++++++++--------- 2 files changed, 64 insertions(+), 35 deletions(-) diff --git a/ground_truth_test.py b/ground_truth_test.py index ce76432..b0f4f63 100755 --- a/ground_truth_test.py +++ b/ground_truth_test.py @@ -2,16 +2,17 @@ from fibsem_tools import read_xarray import json from fibsem_tools.metadata.groundtruth import ( - AnnotationEncoding, + AnnotationProtocol, + MultiscaleGroupAttrs, + SemanticAnnotation, classNameDict, AnnotationArrayAttrs, - AnnotationClassAttrs, AnnotationCropAttrs, ) from rich import print_json import numpy as np import datetime -from typing import Dict, TypedDict, List, TypeVar +from typing import Dict, Literal, TypedDict, List, TypeVar Key = TypeVar("Key", bound=str) @@ -32,6 +33,9 @@ class CropMeta(TypedDict): out_dtype = "uint8" out_dtype_max = np.iinfo(out_dtype).max +tnamesT = Literal["ERES membrane"] +tnames = ["ERES membrane"] + crop_key: Key = "Crop13" group = read_xarray(uri) arr = group["s0"].data @@ -48,7 +52,10 @@ class CropMeta(TypedDict): crop = arr.sel(selecter, method="nearest") crop_attrs = AnnotationCropAttrs( - name=crop_key, description="A crop", protocol=None, doi=None + name=crop_key, + description="A crop", + protocol=AnnotationProtocol[tnamesT](url="www.google.com", classNames=tnames), + doi=None, ) out_attrs = {} @@ -56,17 +63,21 @@ class CropMeta(TypedDict): # partition the subvolume into separate integer classes vals = np.unique(crop) - for v in vals: + name, description = classNameDict[v].short, classNameDict[v].long + if name != "ERES membrane": + continue subvol = (crop == v).astype(out_dtype) - census = {k: np.sum(subvol == k) for k in np.unique(subvol)} - encoding: AnnotationEncoding = {"absent": 0, "unknown": 255} - array_attrs = AnnotationArrayAttrs(census=census, encoding=encoding, object=name) + type = SemanticAnnotation(encoding={"absent": 0, "unknown": 255}) + histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()} + array_attrs = AnnotationArrayAttrs[tnamesT]( + specialValuesHist=histogram, type=type, className=name + ) - group_attrs = AnnotationClassAttrs( - name=name, + group_attrs = MultiscaleGroupAttrs[tnamesT]( + className=name, description=description, created_by=[ "Cellmap annotators", @@ -74,8 +85,7 @@ class CropMeta(TypedDict): created_with=["Amira", "Paintera"], start_date=datetime.datetime.now().isoformat(), duration_days=10, - encoding=encoding, - type="instance", + type=type, ) out_attrs[f"/{crop_key}/{name}"] = {"annotation": group_attrs.dict()} diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py index ed5fe1a..acc47ef 100644 --- a/src/fibsem_tools/metadata/groundtruth.py +++ b/src/fibsem_tools/metadata/groundtruth.py @@ -1,11 +1,14 @@ from __future__ import annotations from enum import Enum -from typing import Dict, List, Literal, Optional, Union +from typing import Dict, Generic, List, Literal, Optional, TypeVar, Union -from pydantic import BaseModel +from pydantic import BaseModel, root_validator +from pydantic.generics import GenericModel -AnnotationType = Union[Literal["semantic"], Literal["instance"]] +class StrictBase(BaseModel): + class Config: + extra = "forbid" class InstanceName(BaseModel): @@ -83,36 +86,48 @@ class LabelList(BaseModel): 39: InstanceName(short="Glycogen", long="Glycogen"), } +Possibility = Literal["unknown", "absent"] -class SemanticAnnotation(BaseModel): - type: Literal["semantic"] - encoding: Dict[int, str] +class SemanticSegmentation(BaseModel): + type: Literal["semantic_segmentation"] = "semantic_segmentation" + encoding: Dict[Union[Possibility, Literal["present"]], int] -class InstanceAnnotation(BaseModel): - type: Literal["instance"] - encoding: Dict[int, Possibility] +class InstanceSegmentation(BaseModel): + type: Literal["instance_segmentation"] = "instance_segmentation" + encoding: Dict[Possibility, int] -Possibility = Union[Literal["unknown"], Literal["absent"], Literal["present"]] -AnnotationEncoding = Dict[Possibility, int] +AnnotationType = Union[SemanticSegmentation, InstanceSegmentation] +TName = TypeVar("TName", bound=str) -class AnnotationArrayAttrs(BaseModel): + +class AnnotationArrayAttrs(GenericModel, Generic[TName]): """ The metadata for an array of annotated values. """ - objects: str + className: TName # a mapping from values to frequencies - census: Dict[int, int] + histogram: Optional[Dict[Possibility, int]] # a mapping from class names to values # this is array metadata because labels might disappear during downsampling - encoding: AnnotationEncoding + annotation_type: AnnotationType + + @root_validator() + def check_encoding(cls, values): + if (typ := values.get("type", False)) and ( + hist := values.get("histogram", False) + ): + # check that everything in the histogram is encoded + assert set(typ.encoding.keys()).issuperset((hist.keys())), "Oh no" + + return values -class AnnotationClassAttrs(BaseModel): +class MultiscaleGroupAttrs(GenericModel, Generic[TName]): """ The metadata for an individual annotated semantic class. In a storage hierarchy like zarr or hdf5, this metadata is associated with a @@ -120,23 +135,27 @@ class AnnotationClassAttrs(BaseModel): annotation data in a multiscale representation. """ - name: str + class_name: TName description: str - created_by: List[str] - created_with: List[str] + created_by: list[str] + created_with: list[str] start_date: str | None end_date: str | None duration_days: int | None - type: AnnotationType - encoding: AnnotationEncoding + annotation_type: AnnotationType + + +class AnnotationProtocol(GenericModel, Generic[TName]): + url: str + class_names: list[TName] -class AnnotationCropAttrs(BaseModel): +class AnnotationCropAttrs(GenericModel, Generic[TName]): """ The metadata for all annotations in a single crop. """ name: Optional[str] description: Optional[str] - protocol: Optional[str] + protocol: AnnotationProtocol[TName] doi: Optional[str] From a84158c4b46ca6e1191b480a8f74e67b6449b3a5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 20 Jun 2023 15:39:16 -0400 Subject: [PATCH 04/11] fix: better ome-ngff version handling --- src/fibsem_tools/io/zarr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fibsem_tools/io/zarr.py b/src/fibsem_tools/io/zarr.py index 359f2e6..958df21 100644 --- a/src/fibsem_tools/io/zarr.py +++ b/src/fibsem_tools/io/zarr.py @@ -329,9 +329,10 @@ def infer_coords(array: zarr.Array) -> List[DataArray]: elif (multiscales := group.attrs.get("multiscales", None)) is not None: if len(multiscales) > 0: multiscale = multiscales[0] - if (ngff_version := multiscale.get("version", None)) == "0.4": + ngff_version = multiscale.get("version", None) + if ngff_version == "0.4": from pydantic_ome_ngff.v04 import Multiscale - elif multiscale["version"] == "0.5-dev": + elif ngff_version == "0.5-dev": from pydantic_ome_ngff.latest import Multiscale else: raise ValueError( @@ -341,7 +342,7 @@ def infer_coords(array: zarr.Array) -> List[DataArray]: """ ) else: - raise ValueError("Multiscales attribute was empty") + raise ValueError("Multiscales attribute was empty.") xarray_adapters = get_adapters(ngff_version) multiscales_meta = [Multiscale(**entry) for entry in multiscales] transforms = [] From 761eb34bbfc6dede9e0dd821d4cca95c30c71dab Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 21 Jun 2023 15:50:54 -0400 Subject: [PATCH 05/11] fix: normalize field names --- ground_truth_test.py | 16 +++++++++------- src/fibsem_tools/metadata/groundtruth.py | 5 ++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ground_truth_test.py b/ground_truth_test.py index b0f4f63..9d0fc1f 100755 --- a/ground_truth_test.py +++ b/ground_truth_test.py @@ -4,7 +4,7 @@ from fibsem_tools.metadata.groundtruth import ( AnnotationProtocol, MultiscaleGroupAttrs, - SemanticAnnotation, + SemanticSegmentation, classNameDict, AnnotationArrayAttrs, AnnotationCropAttrs, @@ -54,7 +54,7 @@ class CropMeta(TypedDict): crop_attrs = AnnotationCropAttrs( name=crop_key, description="A crop", - protocol=AnnotationProtocol[tnamesT](url="www.google.com", classNames=tnames), + protocol=AnnotationProtocol[tnamesT](url="www.google.com", class_names=tnames), doi=None, ) @@ -70,14 +70,14 @@ class CropMeta(TypedDict): continue subvol = (crop == v).astype(out_dtype) - type = SemanticAnnotation(encoding={"absent": 0, "unknown": 255}) + type = SemanticSegmentation(encoding={"absent": 0, "unknown": 255}) histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()} array_attrs = AnnotationArrayAttrs[tnamesT]( - specialValuesHist=histogram, type=type, className=name + histogram=histogram, annotation_type=type, class_name=name ) group_attrs = MultiscaleGroupAttrs[tnamesT]( - className=name, + class_name=name, description=description, created_by=[ "Cellmap annotators", @@ -85,10 +85,12 @@ class CropMeta(TypedDict): created_with=["Amira", "Paintera"], start_date=datetime.datetime.now().isoformat(), duration_days=10, - type=type, + annotation_type=type, ) - out_attrs[f"/{crop_key}/{name}"] = {"annotation": group_attrs.dict()} + out_attrs[f"/{crop_key}/{name.lower().replace(' ', '_')}"] = { + "annotation": group_attrs.dict() + } out_attrs[f"/{crop_key}/{name}/s0"] = {"annotation": array_attrs.dict()} diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py index acc47ef..4f4c708 100644 --- a/src/fibsem_tools/metadata/groundtruth.py +++ b/src/fibsem_tools/metadata/groundtruth.py @@ -109,7 +109,7 @@ class AnnotationArrayAttrs(GenericModel, Generic[TName]): The metadata for an array of annotated values. """ - className: TName + class_name: TName # a mapping from values to frequencies histogram: Optional[Dict[Possibility, int]] # a mapping from class names to values @@ -149,6 +149,9 @@ class AnnotationProtocol(GenericModel, Generic[TName]): url: str class_names: list[TName] + class Config: + allow_extra = "forbid" + class AnnotationCropAttrs(GenericModel, Generic[TName]): """ From 9f6b6a833c3ce164b2d2dc669d66b1afc2731eee Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 29 Jun 2023 18:07:03 -0400 Subject: [PATCH 06/11] add new organelles classes --- src/fibsem_tools/metadata/groundtruth.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/fibsem_tools/metadata/groundtruth.py b/src/fibsem_tools/metadata/groundtruth.py index 47a8b7f..49af792 100644 --- a/src/fibsem_tools/metadata/groundtruth.py +++ b/src/fibsem_tools/metadata/groundtruth.py @@ -41,10 +41,7 @@ class LabelList(BaseModel): 1: InstanceName(short="ECS", long="Extracellular Space"), 2: InstanceName(short="Plasma membrane", long="Plasma membrane"), 3: InstanceName(short="Mito membrane", long="Mitochondrial membrane"), - 4: InstanceName( - short="Mito lumen", - long="Mitochondrial lumen", - ), + 4: InstanceName(short="Mito lumen",long="Mitochondrial lumen"), 5: InstanceName(short="Mito DNA", long="Mitochondrial DNA"), 6: InstanceName(short="Golgi Membrane", long="Golgi apparatus membrane"), 7: InstanceName(short="Golgi lumen", long="Golgi apparatus lumen"), @@ -58,9 +55,7 @@ class LabelList(BaseModel): 15: InstanceName(short="LD lumen", long="Lipid droplet lumen"), 16: InstanceName(short="ER membrane", long="Endoplasmic reticulum membrane"), 17: InstanceName(short="ER lumen", long="Endoplasmic reticulum membrane"), - 18: InstanceName( - short="ERES membrane", long="Endoplasmic reticulum exit site membrane" - ), + 18: InstanceName(short="ERES membrane", long="Endoplasmic reticulum exit site membrane"), 19: InstanceName(short="ERES lumen", long="Endoplasmic reticulum exit site lumen"), 20: InstanceName(short="NE membrane", long="Nuclear envelope membrane"), 21: InstanceName(short="NE lumen", long="Nuclear envelope lumen"), @@ -82,4 +77,13 @@ class LabelList(BaseModel): 37: InstanceName(short="Nucleus combined", long="Nucleus combined"), 38: InstanceName(short="Vimentin", long="Vimentin"), 39: InstanceName(short="Glycogen", long="Glycogen"), + 40: InstanceName(short="Cardiac neurons", long="Cardiac neurons"), + 41: InstanceName(short="Endothelial cells", long="Endothelial cells"), + 42: InstanceName(short="Cardiomyocytes", long="Cardiomyocytes"), + 43: InstanceName(short="Epicardial cells", long="Epicardial cells"), + 44: InstanceName(short="Parietal pericardial cells", long="Parietal pericardial cells"), + 45: InstanceName(short="Red blood cells", long="Red blood cells"), + 46: InstanceName(short="White blood cells", long="White blood cells"), + 47: InstanceName(short="Peroxisome membrane", long="Peroxisome membrane"), + 48: InstanceName(short="Peroxisome lumen", long="Peroxisome lumen"), } From 64efdce4598dc810fd23768be862dc689b486713 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 29 Jun 2023 23:01:17 -0400 Subject: [PATCH 07/11] chore: patch release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6954b4b..ed6c482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fibsem-tools" -version = "4.0.2" +version = "4.0.3" description = "Tools for processing FIBSEM datasets" authors = ["Davis Vann Bennett "] license = "MIT" From 3a9d6c7c9473073fd3c7d95c5cb783ce005068bf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 30 Jun 2023 21:59:16 -0400 Subject: [PATCH 08/11] feat: gha for publishing to pypi --- .github/workflows/upload_pypi.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/upload_pypi.yml diff --git a/.github/workflows/upload_pypi.yml b/.github/workflows/upload_pypi.yml new file mode 100644 index 0000000..1780ce9 --- /dev/null +++ b/.github/workflows/upload_pypi.yml @@ -0,0 +1,24 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install poetry=1.4.1 + - name: Build and publish + env: + PYPI_USERNAME: __token__ + PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + poetry build + poetry publish \ No newline at end of file From 9fdc9bb512bc663df922068de779d38d371c6a9a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sat, 1 Jul 2023 12:38:20 -0400 Subject: [PATCH 09/11] chore: remove demo file --- ground_truth_test.py | 97 -------------------------------------------- 1 file changed, 97 deletions(-) delete mode 100755 ground_truth_test.py diff --git a/ground_truth_test.py b/ground_truth_test.py deleted file mode 100755 index 9d0fc1f..0000000 --- a/ground_truth_test.py +++ /dev/null @@ -1,97 +0,0 @@ -# from fibsem_tools.metadata.groundtruth import GroupMetadata -from fibsem_tools import read_xarray -import json -from fibsem_tools.metadata.groundtruth import ( - AnnotationProtocol, - MultiscaleGroupAttrs, - SemanticSegmentation, - classNameDict, - AnnotationArrayAttrs, - AnnotationCropAttrs, -) -from rich import print_json -import numpy as np -import datetime -from typing import Dict, Literal, TypedDict, List, TypeVar - -Key = TypeVar("Key", bound=str) - - -class CropMeta(TypedDict): - maxId: int - name: str - offset: List[float] - offset_unit: str - resolution: List[float] - resulution_unit: str - type: str - - -dataset = "jrc_hela-2" -bucket = "janelia-cosem-datasets" -uri = f"s3://{bucket}/{dataset}/{dataset}.n5/labels/gt/" -out_dtype = "uint8" -out_dtype_max = np.iinfo(out_dtype).max - -tnamesT = Literal["ERES membrane"] -tnames = ["ERES membrane"] - -crop_key: Key = "Crop13" -group = read_xarray(uri) -arr = group["s0"].data -subvolumeMeta: Dict[Key, CropMeta] = arr.attrs["subvolumes"] -sMeta = subvolumeMeta[crop_key] -dims = ("x", "y", "z") - -scales = arr.attrs["transform"]["scale"][::-1] -offsets = np.multiply(sMeta["offset"], np.divide(scales, sMeta["resolution"])) -selecter = { - d: (np.arange(100) * scale) + offset - for d, offset, scale in zip(dims, offsets, scales) -} - -crop = arr.sel(selecter, method="nearest") -crop_attrs = AnnotationCropAttrs( - name=crop_key, - description="A crop", - protocol=AnnotationProtocol[tnamesT](url="www.google.com", class_names=tnames), - doi=None, -) - -out_attrs = {} -out_attrs[f"/{crop_key}"] = {"annotation": crop_attrs.dict()} -# partition the subvolume into separate integer classes -vals = np.unique(crop) - -for v in vals: - - name, description = classNameDict[v].short, classNameDict[v].long - if name != "ERES membrane": - continue - - subvol = (crop == v).astype(out_dtype) - type = SemanticSegmentation(encoding={"absent": 0, "unknown": 255}) - histogram = {key: np.sum(subvol == value) for key, value in type.encoding.items()} - array_attrs = AnnotationArrayAttrs[tnamesT]( - histogram=histogram, annotation_type=type, class_name=name - ) - - group_attrs = MultiscaleGroupAttrs[tnamesT]( - class_name=name, - description=description, - created_by=[ - "Cellmap annotators", - ], - created_with=["Amira", "Paintera"], - start_date=datetime.datetime.now().isoformat(), - duration_days=10, - annotation_type=type, - ) - - out_attrs[f"/{crop_key}/{name.lower().replace(' ', '_')}"] = { - "annotation": group_attrs.dict() - } - out_attrs[f"/{crop_key}/{name}/s0"] = {"annotation": array_attrs.dict()} - - -print_json(json.dumps(out_attrs)) From aa529d60c1f32173727a4fa35e9db22394a45dee Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Aug 2023 14:51:30 -0400 Subject: [PATCH 10/11] feat: add zarr scanning cli tool to check for invalid chunks and potentially delete them --- src/fibsem_tools/cli/zarr_scan.py | 106 ++++++++++++++++++++++++++++++ src/fibsem_tools/io/zarr.py | 2 +- tests/test_zarr.py | 23 +++++++ 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 src/fibsem_tools/cli/zarr_scan.py diff --git a/src/fibsem_tools/cli/zarr_scan.py b/src/fibsem_tools/cli/zarr_scan.py new file mode 100644 index 0000000..53cf3eb --- /dev/null +++ b/src/fibsem_tools/cli/zarr_scan.py @@ -0,0 +1,106 @@ +from typing import Literal, Union +import click +import zarr +from fibsem_tools import access +from rich import print +from fibsem_tools.io.zarr import get_chunk_keys +from rich.progress import track +import time +from dataclasses import dataclass + +ChunkState = Literal["valid", "missing", "invalid"] + + +@dataclass +class Missing: + variant = "missing" + + +@dataclass +class Invalid: + variant = "invalid" + exception: BaseException + + +@dataclass +class Valid: + variant = "valid" + + +class ChunkSetResults(dict[ChunkState, dict[str, Union[Missing, Valid, Invalid]]]): + pass + + +def check_zarray(array: zarr.Array) -> dict[str, Union[Missing, Invalid, Valid]]: + ckeys = tuple(get_chunk_keys(array)) + results = {} + for ckey in track(ckeys, description="Checking chunks..."): + try: + array._decode_chunk(array.store[ckey]) + results[ckey] = Valid() + except OSError as e: + results[ckey] = Invalid(exception=e) + except KeyError: + results[ckey] = Missing() + + return results + + +@click.command() +@click.argument("array_path", type=click.STRING) +@click.option( + "--valid", + is_flag=True, + show_default=True, + default=False, + help="report valid chunks", +) +@click.option( + "--missing", + is_flag=True, + show_default=True, + default=False, + help="report missing chunks", +) +@click.option( + "--invalid", + is_flag=True, + show_default=True, + default=False, + help="report invalid chunks", +) +@click.option( + "--delete-invalid", + is_flag=True, + show_default=True, + default=False, + help="delete invalid chunks", +) +def cli(array_path, valid, missing, invalid, delete_invalid): + start = time.time() + array = access(array_path, mode="r") + all_results = check_zarray(array) + # categorize + results_categorized: ChunkSetResults = {"valid": {}, "missing": {}, "invalid": {}} + for key, value in all_results.items(): + results_categorized[value.variant][key] = value + + to_show = {} + + for flag, opt in zip((valid, missing, invalid), ("valid", "missing", "invalid")): + if flag: + to_show[opt] = results_categorized[opt] + print(to_show) + if delete_invalid: + array_a = access(array_path, mode="a") + num_invalid = len(results_categorized["invalid"]) + for res in track( + results_categorized["invalid"], + description=f"Deleting {num_invalid} invalid chunks...", + ): + del array_a.store[res] + print(f"Completed after {time.time() - start}s") + + +if __name__ == "__main__": + cli() diff --git a/src/fibsem_tools/io/zarr.py b/src/fibsem_tools/io/zarr.py index 958df21..5c5ba13 100644 --- a/src/fibsem_tools/io/zarr.py +++ b/src/fibsem_tools/io/zarr.py @@ -216,7 +216,7 @@ def access_zarr( array_or_group = zarr.open(store, path=path, **kwargs, mode=access_mode) - if access_mode != "r": + if access_mode != "r" and len(attrs) > 0: array_or_group.attrs.update(attrs) return array_or_group diff --git a/tests/test_zarr.py b/tests/test_zarr.py index 876dab4..95129c7 100644 --- a/tests/test_zarr.py +++ b/tests/test_zarr.py @@ -3,8 +3,10 @@ import pytest from xarray import DataArray from zarr.storage import FSStore +from pathlib import Path import zarr import numpy as np +import itertools from fibsem_tools.io.core import read_dask, read_xarray from fibsem_tools.io.multiscale import multiscale_group from fibsem_tools.io.xr import stt_from_array @@ -15,6 +17,7 @@ access_zarr, create_dataarray, create_datatree, + get_chunk_keys, get_url, to_dask, to_xarray, @@ -247,3 +250,23 @@ def test_dask(temp_zarr, chunks): assert np.array_equal(observed, data) assert np.array_equal(read_dask(get_url(zarray), chunks).compute(), data) + + +@pytest.mark.parametrize( + "store_class", (zarr.N5Store, zarr.DirectoryStore, zarr.NestedDirectoryStore) +) +@pytest.mark.parametrize("shape", ((10,), (10, 11, 12))) +def test_chunk_keys(tmp_path: Path, store_class, shape): + store: zarr.storage.BaseStore = store_class(tmp_path) + arr_path = "test" + arr = zarr.create( + shape=shape, store=store, path=arr_path, chunks=(2,) * len(shape), dtype="uint8" + ) + + dim_sep = arr._dimension_separator + chunk_idcs = itertools.product(*(range(c_s) for c_s in arr.cdata_shape)) + expected = tuple( + os.path.join(arr.path, dim_sep.join(map(str, idx))) for idx in chunk_idcs + ) + observed = tuple(get_chunk_keys(arr)) + assert observed == expected From 0da50de1d3108e66e21bd017228aaf687f8eb18c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Aug 2023 16:46:47 -0400 Subject: [PATCH 11/11] chore: version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ed6c482..e9ad210 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fibsem-tools" -version = "4.0.3" +version = "4.0.4" description = "Tools for processing FIBSEM datasets" authors = ["Davis Vann Bennett "] license = "MIT"