Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

investigate dropping ome-zarr dependency #123

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[settings]
known_third_party = numpy,ome_zarr,pytest,setuptools,vispy
known_third_party = dask,numpy,ome_zarr,pytest,setuptools,vispy,zarr
169 changes: 3 additions & 166 deletions napari_ome_zarr/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,12 @@
"""


import logging
import warnings
from importlib.metadata import version
from typing import Any, Dict, Iterator, List, Optional

import numpy as np
from ome_zarr.io import parse_url
from ome_zarr.reader import Label, Node, Reader
from ome_zarr.types import LayerData, PathLike, ReaderFunction
from vispy.color import Colormap
from .ome_zarr_reader import read_ome_zarr

LOGGER = logging.getLogger("napari_ome_zarr.reader")

METADATA_KEYS = ("name", "visible", "contrast_limits", "colormap", "metadata")

# major and minor versions as int
napari_version = tuple(map(int, list(version("napari").split(".")[:2])))


def napari_get_reader(path: PathLike) -> Optional[ReaderFunction]:
def napari_get_reader(path):
"""Returns a reader for supported paths that include IDR ID.

- URL of the form: https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/ID.zarr/
Expand All @@ -32,153 +18,4 @@ def napari_get_reader(path: PathLike) -> Optional[ReaderFunction]:
if len(path) > 1:
warnings.warn("more than one path is not currently supported")
path = path[0]
zarr = parse_url(path)
if zarr:
reader = Reader(zarr)
return transform(reader())
# Ignoring this path
return None


def transform_properties(
props: Optional[Dict[str, Dict]] = None
) -> Optional[Dict[str, List]]:
"""
Transform properties

Transform a dict of {label_id : {key: value, key2: value2}}
with a key for every LABEL
into a dict of a key for every VALUE, with a list of values for each
.. code::

{
"index": [1381342, 1381343...]
"omero:roiId": [1381342, 1381343...],
"omero:shapeId": [1682567, 1682567...]
}

"""
if props is None:
return None

properties: Dict[str, List] = {}

# First, create lists for all existing keys...
for label_id, props_dict in props.items():
for key in props_dict.keys():
properties[key] = []

keys = list(properties.keys())

properties["index"] = []
for label_id, props_dict in props.items():
properties["index"].append(label_id)
# ...in case some objects don't have all the keys
for key in keys:
properties[key].append(props_dict.get(key, None))
return properties


def transform_scale(
node_metadata: Dict, metadata: Dict, channel_axis: Optional[int]
) -> None:
"""
e.g. transformation is {"scale": [0.2, 0.06, 0.06]}
Get a list of these for each level in data. Just use first?
"""
if "coordinateTransformations" in node_metadata:
level_0_transforms = node_metadata["coordinateTransformations"][0]
for transf in level_0_transforms:
if "scale" in transf:
scale = transf["scale"]
if channel_axis is not None:
scale.pop(channel_axis)
metadata["scale"] = tuple(scale)
if "translation" in transf:
translate = transf["translation"]
if channel_axis is not None:
translate.pop(channel_axis)
metadata["translate"] = tuple(translate)


def transform(nodes: Iterator[Node]) -> Optional[ReaderFunction]:
def f(*args: Any, **kwargs: Any) -> List[LayerData]:
results: List[LayerData] = list()

for node in nodes:
data: List[Any] = node.data
metadata: Dict[str, Any] = {}
if data is None or len(data) < 1:
LOGGER.debug("skipping non-data %s", node)
else:
LOGGER.debug("transforming %s", node)
LOGGER.debug("node.metadata: %s", node.metadata)

layer_type: str = "image"
channel_axis = None
try:
ch_types = [axis["type"] for axis in node.metadata["axes"]]
if "channel" in ch_types:
channel_axis = ch_types.index("channel")
except Exception:
LOGGER.error("Error reading axes: Please update ome-zarr")
raise

transform_scale(node.metadata, metadata, channel_axis)

if node.load(Label):
layer_type = "labels"
for x in METADATA_KEYS:
if x in node.metadata:
metadata[x] = node.metadata[x]
elif x == "colormap" and node.metadata["color"]:
# key changed 'color' -> 'colormap' in napari 0.5
if napari_version >= (0, 5):
metadata["colormap"] = node.metadata["color"]
else:
metadata["color"] = node.metadata["color"]
if channel_axis is not None:
data = [
np.squeeze(level, axis=channel_axis) for level in node.data
]
else:
# Handle the removal of vispy requirement from ome-zarr-py
cms = node.metadata.get("colormap", [])
for idx, cm in enumerate(cms):
if not isinstance(cm, Colormap):
cms[idx] = Colormap(cm)

if channel_axis is not None:
# multi-channel; Copy known metadata values
metadata["channel_axis"] = channel_axis
for x in METADATA_KEYS:
if x in node.metadata:
metadata[x] = node.metadata[x]
# overwrite 'name' if we have 'channel_names'
if "channel_names" in node.metadata:
metadata["name"] = node.metadata["channel_names"]
else:
# single channel image, so metadata just needs
# single items (not lists)
for x in METADATA_KEYS:
if x in node.metadata:
try:
metadata[x] = node.metadata[x][0]
except Exception:
pass
# overwrite 'name' if we have 'channel_names'
if "channel_names" in node.metadata:
if len(node.metadata["channel_names"]) > 0:
metadata["name"] = node.metadata["channel_names"][0]

properties = transform_properties(node.metadata.get("properties"))
if properties is not None:
metadata["properties"] = properties

rv: LayerData = (data, metadata, layer_type)
LOGGER.debug("Transformed: %s", rv)
results.append(rv)

return results

return f
return read_ome_zarr(path)
210 changes: 210 additions & 0 deletions napari_ome_zarr/ome_zarr_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# zarr v3

from typing import Any, Dict, List, Tuple, Union
from xml.etree import ElementTree as ET

import dask.array as da
import zarr
from vispy.color import Colormap
from zarr import Group
from zarr.core.buffer import default_buffer_prototype
from zarr.core.sync import SyncMixin

LayerData = Union[Tuple[Any], Tuple[Any, Dict], Tuple[Any, Dict, str]]


class Spec:
def __init__(self, group: Group):
self.group = group

@staticmethod
def matches(group: Group) -> bool:
return False

def data(self) -> List[da.core.Array] | None:
return None

def metadata(self) -> Dict[str, Any] | None:
# napari layer metadata
return {}

def children(self):
return []

def iter_nodes(self):
yield self
for child in self.children():
yield from child.iter_nodes()

def iter_data(self):
for node in self.iter_nodes():
data = node.data()
if data:
yield data

@staticmethod
def get_attrs(group: Group):
if "ome" in group.attrs:
return group.attrs["ome"]
return group.attrs


class Multiscales(Spec):
@staticmethod
def matches(group: Group) -> bool:
return "multiscales" in Spec.get_attrs(group)

def children(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dstansby By "graph traversal" logic, I mean, if I start with multiscales group e.g. group = zarr.open("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr") I then want to get the labels (if they exist). Here this is implemented in the children() method, where we know to look in a child "labels" group and check attrs for "labels": ["labels1.zarr", "labels2.zarr"] then return objects for those child labels so that the arrays (and metadata) can be added to the layers that are passed to napari.

I don't see that ome-zarr-models-py includes that kind of logic for traversing the graph between these objects?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can get the list of labels paths from Image.attributes.labels. But the labels part of the spec just says these point to "labels objects", which I don't think are more specifically defined anywhere else?

If the OME-Zarr spec was more prescriptive about what these "labels objects" were (are they meant to be groups with image-label metadata ??) then we could certainly do more, but I don't think the spec allows us to make those assumptions unfortunately.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am only now reading version 0.5 of OME-zarr, and see that the labels section is much improved over 0.4 😄 . It's definitely within scope of ome-zarr-models-py to provide logic for getting from an Image dataset to the labels dataset if it's in the metadata. Tracking issue at ome-zarr-models/ome-zarr-models-py#92

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently in the tutorial at https://github.com/BioImageTools/ome-zarr-models-py/blob/main/docs/tutorial.py
If I add:

print(ome_zarr_image.attributes.labels)

I get None (even though that image does have labels).
I don't see any population of the labels in https://github.com/BioImageTools/ome-zarr-models-py/blob/7659a114a2428fe9d8acbd06aa7bc1c9d32624bb/src/ome_zarr_models/v04/image.py#L85 ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even though that image does have labels

There's a labels group, but looking at that dataset in the validator the top level group is missing the labels metadata, which is why .labels is giving None.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be precise, if you look at the image group in the validator, it should have a "labels" key at the same level as the "multiscales" and "omero" keys. If that was there, the paths under the "labels" key would be in the .labels attribute in ome-zarr-models-py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh hold on, am I just reading the spec wrong? Does:

The special group "labels" found under an image Zarr

Really mean:

The special Zarr group "labels" found inside an image Zarr group

?

If so then we should definitely implement that in ome-zarr-models-py!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, image.zarr/labels/ group.
This is shown a bit more clearly in the layout at https://ngff.openmicroscopy.org/0.4/index.html#image-layout

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, I always thought the "labels" group was an arbitrary name and the example was just an example 🤦 - thanks for explaining, and I'll let you know once I've implemented this in ome-zarr-models-py 😄

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ch = []
# test for child "labels"
try:
grp = self.group["labels"]
attrs = Spec.get_attrs(grp)
if "labels" in attrs:
for name in attrs["labels"]:
g = grp[name]
if Label.matches(g):
ch.append(Label(g))
except KeyError:
pass
return ch

def data(self):
attrs = Spec.get_attrs(self.group)
paths = [ds["path"] for ds in attrs["multiscales"][0]["datasets"]]
return [da.from_zarr(self.group[path]) for path in paths]

def metadata(self):
rsp = {}
attrs = Spec.get_attrs(self.group)
axes = attrs["multiscales"][0]["axes"]
atypes = [axis["type"] for axis in axes]
if "channel" in atypes:
channel_axis = atypes.index("channel")
rsp["channel_axis"] = channel_axis
if "omero" in attrs:
colormaps = []
ch_names = []
visibles = []
contrast_limits = []

for index, ch in enumerate(attrs["omero"]["channels"]):
color = ch.get("color", None)
if color is not None:
rgb = [(int(color[i : i + 2], 16) / 255) for i in range(0, 6, 2)]
# colormap is range: black -> rgb color
colormaps.append(Colormap([[0, 0, 0], rgb]))
ch_names.append(ch.get("label", str(index)))
visibles.append(ch.get("active", True))

window = ch.get("window", None)
if window is not None:
start = window.get("start", None)
end = window.get("end", None)
if start is None or end is None:
# Disable contrast limits settings if one is missing
contrast_limits = None
elif contrast_limits is not None:
contrast_limits.append([start, end])

if rsp.get("channel_axis") is not None:
rsp["colormap"] = colormaps
rsp["name"] = ch_names
rsp["contrast_limits"] = contrast_limits
rsp["visible"] = visibles
else:
rsp["colormap"] = colormaps[0]
rsp["name"] = ch_names[0]
rsp["contrast_limits"] = contrast_limits[0]
rsp["visible"] = visibles[0]

return rsp


class Bioformats2raw(Spec):
@staticmethod
def matches(group: Group) -> bool:
attrs = Spec.get_attrs(group)
# Don't consider "plate" as a Bioformats2raw layout
return "bioformats2raw.layout" in attrs and "plate" not in attrs

def children(self):
# lookup children from series of OME/METADATA.xml
xml_data = SyncMixin()._sync(
self.group.store.get(
"OME/METADATA.ome.xml", prototype=default_buffer_prototype()
)
)
# print("xml_data", xml_data.to_bytes())
root = ET.fromstring(xml_data.to_bytes())
rv = []
for child in root:
# {http://www.openmicroscopy.org/Schemas/OME/2016-06}Image
print(child.tag)
node_id = child.attrib.get("ID", "")
if child.tag.endswith("Image") and node_id.startswith("Image:"):
print("Image ID", node_id)
image_path = node_id.replace("Image:", "")
g = self.group[image_path]
if Multiscales.matches(g):
rv.append(Multiscales(g))
return rv

# override to NOT yield self since node has no data
def iter_nodes(self):
for child in self.children():
yield from child.iter_nodes()


class Plate(Spec):
@staticmethod
def matches(group: Group) -> bool:
return "plate" in Spec.get_attrs(group)


class Label(Multiscales):
@staticmethod
def matches(group: Group) -> bool:
# label must also be Multiscales
if not Multiscales.matches(group):
return False
return "image-label" in Spec.get_attrs(group)

def metadata(self) -> Dict[str, Any] | None:
# override Multiscales metadata
return {}


def read_ome_zarr(url):
def f(*args: Any, **kwargs: Any) -> List[LayerData]:
results: List[LayerData] = list()

# TODO: handle missing file
root_group = zarr.open(url)

print("Root group", root_group.attrs.asdict())

if Bioformats2raw.matches(root_group):
spec = Bioformats2raw(root_group)
elif Multiscales.matches(root_group):
spec = Multiscales(root_group)
elif Plate.matches(root_group):
spec = Plate(root_group)

if spec:
print("spec", spec)
nodes = list(spec.iter_nodes())
print("Nodes", nodes)
for node in nodes:
node_data = node.data()
metadata = node.metadata()
# print(Spec.get_attrs(node.group))
if Label.matches(node.group):
rv: LayerData = (node_data, metadata, "labels")
else:
rv: LayerData = (node_data, metadata)
results.append(rv)

return results

return f
Loading
Loading