From 7d255c6b5917cf53bd1868f3103c3d5ca4c43f63 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 17 Oct 2023 16:54:43 -0400 Subject: [PATCH 1/2] make zarr array backed by asdf file pickleable --- asdf_zarr/storage.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/asdf_zarr/storage.py b/asdf_zarr/storage.py index f0bac1a..6e90caa 100644 --- a/asdf_zarr/storage.py +++ b/asdf_zarr/storage.py @@ -2,6 +2,7 @@ import json import math +import asdf import numpy import zarr @@ -156,6 +157,36 @@ def __init__(self, ctx, chunk_block_map_index, zarray_meta): self._chunk_asdf_keys[chunk_key] = asdf_key self._chunk_callbacks[chunk_key] = ctx.get_block_data_callback(block_index, asdf_key) + def __getstate__(self): + state = {} + state["_sep"] = self._sep + + # for each callback, get the file uri and block offset + def _callback_info(cb): + return { + "offset": cb(_attr="offset"), + "uri": cb(_attr="_fd")().uri, + } + + state["_chunk_callbacks"] = {k: _callback_info(self._chunk_callbacks[k]) for k in self._chunk_callbacks} + return state + + def __setstate__(self, state): + self._sep = state["_sep"] + + def _to_callback(info): + def cb(): + with asdf.generic_io.get_file(info["uri"], mode="r") as gf: + return asdf._block.io.read_block(gf, info["offset"])[-1] + + return cb + + self._chunk_callbacks = {k: _to_callback(state["_chunk_callbacks"][k]) for k in state["_chunk_callbacks"]} + # as __init__ will not be called on self, set up attributed expected + # due to the parent InternalStore class + self._tmp_store_ = None + self._deleted_keys = set() + def _sep_key(self, key): if self._sep is None: return key From e28f1a6a8336db26a69811306da20b61632d2ca2 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 17 Oct 2023 17:14:23 -0400 Subject: [PATCH 2/2] allow ReadInternalStore to be re-pickled (for dask) --- asdf_zarr/storage.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/asdf_zarr/storage.py b/asdf_zarr/storage.py index 6e90caa..03012f1 100644 --- a/asdf_zarr/storage.py +++ b/asdf_zarr/storage.py @@ -160,15 +160,20 @@ def __init__(self, ctx, chunk_block_map_index, zarray_meta): def __getstate__(self): state = {} state["_sep"] = self._sep - - # for each callback, get the file uri and block offset - def _callback_info(cb): - return { - "offset": cb(_attr="offset"), - "uri": cb(_attr="_fd")().uri, - } - - state["_chunk_callbacks"] = {k: _callback_info(self._chunk_callbacks[k]) for k in self._chunk_callbacks} + if hasattr(self, "_chunk_info"): + # handle instance that was already pickled and unpickled + state["_chunk_info"] = self._chunk_info + else: + # and instance that was not yet pickled + + # for each callback, get the file uri and block offset + def _callback_info(cb): + return { + "offset": cb(_attr="offset"), + "uri": cb(_attr="_fd")().uri, + } + + state["_chunk_info"] = {k: _callback_info(self._chunk_callbacks[k]) for k in self._chunk_callbacks} return state def __setstate__(self, state): @@ -181,7 +186,8 @@ def cb(): return cb - self._chunk_callbacks = {k: _to_callback(state["_chunk_callbacks"][k]) for k in state["_chunk_callbacks"]} + self._chunk_info = state["_chunk_info"] + self._chunk_callbacks = {k: _to_callback(self._chunk_info[k]) for k in self._chunk_info} # as __init__ will not be called on self, set up attributed expected # due to the parent InternalStore class self._tmp_store_ = None