From b23eda3965ce7a5cae38bd0277d33a754c829f6d Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 24 Jul 2023 20:14:00 +0200 Subject: [PATCH 01/10] Use __record__ instead of __list__ to store behavior --- anndata/_core/views.py | 13 +++++++------ anndata/tests/test_awkward.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/anndata/_core/views.py b/anndata/_core/views.py index c81eb6824..46add57b7 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -4,7 +4,7 @@ from copy import deepcopy from collections.abc import Sequence, KeysView, Callable, Iterable, Mapping from functools import reduce, singledispatch, wraps -from typing import Any, Literal +from typing import Any, Literal, Dict, cast import warnings import numpy as np @@ -295,8 +295,8 @@ def __copy__(self) -> AwkArray: """ array = self # makes a shallow copy and removes the reference to the original AnnData object - array = ak.with_parameter(self, _PARAM_NAME, None) - array = ak.with_parameter(array, "__list__", None) + array = ak.with_parameter(array, _PARAM_NAME, None) + array = ak.with_parameter(array, "__record__", None) return array @as_view.register(AwkArray) @@ -308,15 +308,16 @@ def as_view_awkarray(array, view_args): # possible strategies to stack behaviors. # A better solution might be based on xarray-style "attrs", once this is implemented # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114 - if type(array).__name__ != "Array": + if "__record__" in cast(Dict, ak.parameters(array)): raise NotImplementedError( - "Cannot create a view of an awkward array with __array__ parameter. " + "Cannot create a view of an awkward array with __record__ parameter. " "Please open an issue in the AnnData repo and describe your use-case." ) array = ak.with_parameter(array, _PARAM_NAME, (parent_key, attrname, keys)) - array = ak.with_parameter(array, "__list__", "AwkwardArrayView") + array = ak.with_parameter(array, "__record__", "AwkwardArrayView") return array + ak.behavior["*", "AwkwardArrayView"] = AwkwardArrayView ak.behavior["AwkwardArrayView"] = AwkwardArrayView except ImportError: diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 87280d5a2..89142eccb 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -159,7 +159,7 @@ def reversed(self): ak.behavior[BEHAVIOUR_ID] = ReversibleArray adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=()) adata.obsm["awk_string"] = ak.with_parameter( - ak.Array(["AAA", "BBB", "CCC"]), "__list__", BEHAVIOUR_ID + ak.Array(["AAA", "BBB", "CCC"]), "__record__", BEHAVIOUR_ID ) adata_view = adata[:2] From b6406179f50ce69d84112a5426d206a203a1181d Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 25 Jul 2023 07:23:12 +0200 Subject: [PATCH 02/10] Fix views of views --- anndata/_core/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 46add57b7..0cc39fce6 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -308,7 +308,10 @@ def as_view_awkarray(array, view_args): # possible strategies to stack behaviors. # A better solution might be based on xarray-style "attrs", once this is implemented # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114 - if "__record__" in cast(Dict, ak.parameters(array)): + # + # Allow only Arrays that have no record behavior attached, or with __record__ explicitly + # set to None + if ak.parameters(array).get("__record__", None) is not None: raise NotImplementedError( "Cannot create a view of an awkward array with __record__ parameter. " "Please open an issue in the AnnData repo and describe your use-case." From 843833ff67551c02ad254dd1423a9138063cb364 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 25 Jul 2023 18:52:19 +0200 Subject: [PATCH 03/10] Update anndata/_core/views.py Co-authored-by: Angus Hollands --- anndata/_core/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 0cc39fce6..03b69ba13 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -311,7 +311,7 @@ def as_view_awkarray(array, view_args): # # Allow only Arrays that have no record behavior attached, or with __record__ explicitly # set to None - if ak.parameters(array).get("__record__", None) is not None: + if array.layout.purelist_parameter("__record__") is not None: raise NotImplementedError( "Cannot create a view of an awkward array with __record__ parameter. " "Please open an issue in the AnnData repo and describe your use-case." From 8aa09cc3a91676777e1f77593c8ca0f33a164f5b Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Wed, 30 Aug 2023 17:30:51 +0200 Subject: [PATCH 04/10] Remove all custom code for views --- anndata/_core/views.py | 70 +++-------------------------------- anndata/_io/specs/methods.py | 6 --- anndata/tests/test_awkward.py | 20 +++++----- 3 files changed, 14 insertions(+), 82 deletions(-) diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 03b69ba13..ee149a9ad 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -262,71 +262,11 @@ def as_view_zappy(z, view_args): return z -try: - from ..compat import awkward as ak - import weakref - - # Registry to store weak references from AwkwardArrayViews to their parent AnnData container - _registry = weakref.WeakValueDictionary() - _PARAM_NAME = "_view_args" - - class AwkwardArrayView(_ViewMixin, AwkArray): - @property - def _view_args(self): - """Override _view_args to retrieve the values from awkward arrays parameters. - - Awkward arrays cannot be subclassed like other python objects. Instead subclasses need - to be attached as "behavior". These "behaviors" cannot take any additional parameters (as we do - for other data types to store `_view_args`). Therefore, we need to store `_view_args` using awkward's - parameter mechanism. These parameters need to be json-serializable, which is why we can't store - ElementRef directly, but need to replace the reference to the parent AnnDataView container with a weak - reference. - """ - parent_key, attrname, keys = self.layout.parameter(_PARAM_NAME) - parent = _registry[parent_key] - return ElementRef(parent, attrname, keys) - - def __copy__(self) -> AwkArray: - """ - Turn the AwkwardArrayView into an actual AwkwardArray with no special behavior. - - Need to override __copy__ instead of `.copy()` as awkward arrays don't implement `.copy()` - and are copied using python's standard copy mechanism in `aligned_mapping.py`. - """ - array = self - # makes a shallow copy and removes the reference to the original AnnData object - array = ak.with_parameter(array, _PARAM_NAME, None) - array = ak.with_parameter(array, "__record__", None) - return array - - @as_view.register(AwkArray) - def as_view_awkarray(array, view_args): - parent, attrname, keys = view_args - parent_key = f"target-{id(parent)}" - _registry[parent_key] = parent - # TODO: See https://github.com/scverse/anndata/pull/647#discussion_r963494798_ for more details and - # possible strategies to stack behaviors. - # A better solution might be based on xarray-style "attrs", once this is implemented - # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114 - # - # Allow only Arrays that have no record behavior attached, or with __record__ explicitly - # set to None - if array.layout.purelist_parameter("__record__") is not None: - raise NotImplementedError( - "Cannot create a view of an awkward array with __record__ parameter. " - "Please open an issue in the AnnData repo and describe your use-case." - ) - array = ak.with_parameter(array, _PARAM_NAME, (parent_key, attrname, keys)) - array = ak.with_parameter(array, "__record__", "AwkwardArrayView") - return array - - ak.behavior["*", "AwkwardArrayView"] = AwkwardArrayView - ak.behavior["AwkwardArrayView"] = AwkwardArrayView - -except ImportError: - - class AwkwardArrayView: - pass +@as_view.register(AwkArray) +def as_view_awkarray(array, view_args): + # TODO @grst: describe why a view is not necessary + # calling the AwkArray constructor triggers a shallow copy + return AwkArray(array) def _resolve_idxs(old, new, adata): diff --git a/anndata/_io/specs/methods.py b/anndata/_io/specs/methods.py index 37c532288..a50485574 100644 --- a/anndata/_io/specs/methods.py +++ b/anndata/_io/specs/methods.py @@ -516,12 +516,6 @@ def read_sparse_partial(elem, *, items=None, indices=(slice(None), slice(None))) @_REGISTRY.register_write(H5Group, AwkArray, IOSpec("awkward-array", "0.1.0")) @_REGISTRY.register_write(ZarrGroup, AwkArray, IOSpec("awkward-array", "0.1.0")) -@_REGISTRY.register_write( - H5Group, views.AwkwardArrayView, IOSpec("awkward-array", "0.1.0") -) -@_REGISTRY.register_write( - ZarrGroup, views.AwkwardArrayView, IOSpec("awkward-array", "0.1.0") -) def write_awkward(f, k, v, _writer, dataset_kwargs=MappingProxyType({})): from anndata.compat import awkward as ak diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 89142eccb..1c93652ef 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -130,9 +130,8 @@ def test_view(key): getattr(adata, key)["awk"] = ak.Array([{"a": [1], "b": [2], "c": [3]}] * 3) adata_view = adata[:2, :2] - with pytest.warns(ImplicitModificationWarning, match="initializing view as actual"): - getattr(adata_view, key)["awk"]["c"] = np.full((2, 1), 4) - getattr(adata_view, key)["awk"]["d"] = np.full((2, 1), 5) + getattr(adata_view, key)["awk"]["c"] = np.full((2, 1), 4) + getattr(adata_view, key)["awk"]["d"] = np.full((2, 1), 5) # values in view were correctly set npt.assert_equal(getattr(adata_view, key)["awk"]["c"], np.full((2, 1), 4)) @@ -145,9 +144,7 @@ def test_view(key): def test_view_of_awkward_array_with_custom_behavior(): - """Currently can't create view of arrays with custom __name__ (in this case "string") - See https://github.com/scverse/anndata/pull/647#discussion_r963494798_""" - + """Ensure that a custom behavior persists when creating a view.""" from uuid import uuid4 BEHAVIOUR_ID = str(uuid4()) @@ -157,14 +154,15 @@ def reversed(self): return self[..., ::-1] ak.behavior[BEHAVIOUR_ID] = ReversibleArray + ak.behavior["*", BEHAVIOUR_ID] = ReversibleArray + adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=()) - adata.obsm["awk_string"] = ak.with_parameter( - ak.Array(["AAA", "BBB", "CCC"]), "__record__", BEHAVIOUR_ID + adata.obsm["awk_string"] = ak.with_name( + ak.Array([{"a": "AAA"}, {"a": "BBB"}, {"a": "CCC"}]), BEHAVIOUR_ID ) - adata_view = adata[:2] + ak_view = adata[1:] - with pytest.raises(NotImplementedError): - adata_view.obsm["awk_string"] + assert ak.to_list(ak_view.obsm["awk_string"].reversed()["a"]) == ["CCC", "BBB"] @pytest.mark.parametrize( From 86c5ff8529e0d7631b4712285dfa8f24472b8c8b Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 1 Sep 2023 09:31:19 +0200 Subject: [PATCH 05/10] WIP add test cases --- anndata/tests/test_awkward.py | 82 ++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 16 deletions(-) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 1c93652ef..d352f5715 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -123,24 +123,74 @@ def test_copy(key): getattr(adata, key)["awk"]["d"] -@pytest.mark.parametrize("key", ["obsm", "varm"]) -def test_view(key): - """Check that modifying a view does not modify the original""" +@pytest.mark.parametrize( + "array,setter_slice,setter_value,expected", + [ + # Non-records are immutable and setting on them results in a TypeError + pytest.param( + [[1], [2, 3], [4, 5, 6]], + (slice(None), 2), + 42, + TypeError, + id="immutable_ragged_list", + ), + pytest.param( + np.zeros((3, 3)), + (slice(None), 1), + 42, + TypeError, + id="immutable_regular_type", + ), + pytest.param( + [{"a": 1}, {"a": 2}, {"a": 3}], + "a", + [42, 43, 44], + [{"a": 42}, {"a": 43}, {"a": 44}], + id="updating_record", + ), + pytest.param( + [{"a": 1}, {"a": 2}, {"a": 3}], + "b", + [42, 43, 44], + [{"a": 1, "b": 42}, {"a": 2, "b": 43}, {"a": 3, "b": 44}], + id="adding_record", + ), + ], +) +@pytest.mark.parametrize("key", ["obsm", "varm", "uns"]) +def test_view(key, array, setter_slice, setter_value, expected): + """Check that modifying a view does not modify the original. + + Parameters + ---------- + key + key in anndata, obsm, varm, or uns + view_func + a function that returns a view of an AnnData object + array + The array that is assigned to adata[key]["awk"] for testing. + setter_slice + The slice used for setting a value on the awkward array with `arr[slice] = ...` + setter_value + The value assigned to the array with `arr[slice] = setter_value` + expected + The expected array after setting the value. Can be an exception if setting the value is supposed + to result in an error. + """ adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=()) - getattr(adata, key)["awk"] = ak.Array([{"a": [1], "b": [2], "c": [3]}] * 3) - adata_view = adata[:2, :2] - - getattr(adata_view, key)["awk"]["c"] = np.full((2, 1), 4) - getattr(adata_view, key)["awk"]["d"] = np.full((2, 1), 5) - - # values in view were correctly set - npt.assert_equal(getattr(adata_view, key)["awk"]["c"], np.full((2, 1), 4)) - npt.assert_equal(getattr(adata_view, key)["awk"]["d"], np.full((2, 1), 5)) + getattr(adata, key)["awk"] = ak.Array(array) + adata_view = adata[:] + awk_view = getattr(adata_view, key)["awk"] - # values in original were not updated - npt.assert_equal(getattr(adata, key)["awk"]["c"], np.full((3, 1), 3)) - with pytest.raises(IndexError): - getattr(adata, key)["awk"]["d"] + if isinstance(expected, type): + with pytest.raises(expected): + awk_view[setter_slice] = setter_value + else: + awk_view[setter_slice] = setter_value + # values in view are correctly set + assert ak.all(awk_view[setter_slice] == setter_value) + # values in original were not modified + assert ak.to_list(getattr(adata, key)["awk"]) == array def test_view_of_awkward_array_with_custom_behavior(): From dfe7008035f5e4fa3b89a2cab0365a9459dc399d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Sep 2023 07:41:51 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- anndata/tests/test_awkward.py | 1 - 1 file changed, 1 deletion(-) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index d352f5715..4aa36ac08 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -5,7 +5,6 @@ from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward from anndata.compat import awkward as ak -from anndata import ImplicitModificationWarning from anndata.utils import dim_len from anndata import AnnData, read_h5ad import anndata From 8b14e57e6c39b9f0aa5ea4b4651d00251d116ab1 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 1 Sep 2023 09:47:23 +0200 Subject: [PATCH 07/10] More test cases --- anndata/tests/test_awkward.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 4aa36ac08..1dda6641c 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -154,6 +154,13 @@ def test_copy(key): [{"a": 1, "b": 42}, {"a": 2, "b": 43}, {"a": 3, "b": 44}], id="adding_record", ), + pytest.param( + [{"outer": {"a": 1}}, {"outer": {"a": 2}}, {"outer": {"a": 3}}], + ("outer", "a"), + [42, 43, 44], + [{"outer": {"a": 42}}, {"outer": {"a": 43}}, {"outer": {"a": 44}}], + id="updating_nested_record", + ), ], ) @pytest.mark.parametrize("key", ["obsm", "varm", "uns"]) From 4870553c56286ab5bcb68386126701851b2910f2 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 1 Sep 2023 09:52:09 +0200 Subject: [PATCH 08/10] Add comment about view behavior --- anndata/_core/views.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 77f2645d8..6abe09afc 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -302,9 +302,11 @@ def as_view_zappy(z, view_args): @as_view.register(AwkArray) def as_view_awkarray(array, view_args): - # TODO @grst: describe why a view is not necessary - # calling the AwkArray constructor triggers a shallow copy - return AwkArray(array) + # We don't need any specific view behavior for awkward arrays. A slice of an awkward array is always a + # shallow copy of the original. This implies that setting a record field on a slice never modifies the original. + # Other fields than records are entirely immutable anyway. + # See also https://github.com/scverse/anndata/issues/1035#issuecomment-1687619270. + return array @as_view.register(CupyArray) From 4b34598f7c9c7afc4abd43420ae9985d290543eb Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 8 Sep 2023 08:51:45 +0200 Subject: [PATCH 09/10] Fix akward array view test The test didn't catch that when modifying an awkward array in a AnnDataView, the changes did not persist within that view. The issue was that I had been retreiving the awkward array from the view once, and then did tests on it. Instead, to properly test this, I need to retreive the Awkward array from the AnnData view every time. It turns out doing so gives me a fresh copy of the origina awkward array and modifications only affect that copy - not the original data. --- anndata/tests/test_awkward.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 1dda6641c..7b9392ba6 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -163,7 +163,14 @@ def test_copy(key): ), ], ) -@pytest.mark.parametrize("key", ["obsm", "varm", "uns"]) +@pytest.mark.parametrize( + "key", + [ + "obsm", + "varm", + # "uns", + ], +) def test_view(key, array, setter_slice, setter_value, expected): """Check that modifying a view does not modify the original. @@ -186,15 +193,15 @@ def test_view(key, array, setter_slice, setter_value, expected): adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=()) getattr(adata, key)["awk"] = ak.Array(array) adata_view = adata[:] - awk_view = getattr(adata_view, key)["awk"] + get_awk_view = lambda *_: getattr(adata_view, key)["awk"] if isinstance(expected, type): with pytest.raises(expected): - awk_view[setter_slice] = setter_value + get_awk_view()[setter_slice] = setter_value else: - awk_view[setter_slice] = setter_value + get_awk_view()[setter_slice] = setter_value # values in view are correctly set - assert ak.all(awk_view[setter_slice] == setter_value) + assert ak.to_list(get_awk_view()) == expected # values in original were not modified assert ak.to_list(getattr(adata, key)["awk"]) == array From fdeff90ff5f0206f022a645206ee2ae510eb18f0 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 8 Sep 2023 08:55:58 +0200 Subject: [PATCH 10/10] Fix that awkward arrays in an awyward array view could not be modified. I solved the issue described in the previous commit by adding logic to `AlignedViewMixin` that can copy objects of a certain type (for now only awkward arrays) upon view creation. --- anndata/_core/aligned_mapping.py | 24 ++++++++++++++++++++---- anndata/_core/views.py | 2 +- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py index 156146518..820927671 100644 --- a/anndata/_core/aligned_mapping.py +++ b/anndata/_core/aligned_mapping.py @@ -136,13 +136,27 @@ class AlignedViewMixin: parent_mapping: Mapping[str, V] """The object this is a view of.""" + copied_objects: Mapping[str, V] + """Objects that are copied at view creation, because they natively support copy on write""" + is_view = True def __getitem__(self, key: str) -> V: - return as_view( - _subset(self.parent_mapping[key], self.subset_idx), - ElementRef(self.parent, self.attrname, (key,)), - ) + try: + return self.copied_objects[key] + # key might not exist, or copied_objects might not yet be initialized + except (KeyError, AttributeError): + return as_view( + _subset(self.parent_mapping[key], self.subset_idx), + ElementRef(self.parent, self.attrname, (key,)), + ) + + def _copy_objects(self): + """For some objects (Awkward arrays) we want to store a copy of the slice at view creation.""" + self.copied_objects = {} + for key, value in self.parent_mapping.items(): + if isinstance(value, AwkArray): + self.copied_objects[key] = AwkArray(self[key]) def __setitem__(self, key: str, value: V): value = self._validate_value(value, key) # Validate before mutating @@ -286,9 +300,11 @@ def __init__( subset_idx: OneDIdx, ): self.parent_mapping = parent_mapping + self.copied_objects = {} self._parent = parent_view self.subset_idx = subset_idx self._axis = parent_mapping._axis + self._copy_objects() AxisArraysBase._view_class = AxisArraysView diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 6abe09afc..0018cd6fd 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -306,7 +306,7 @@ def as_view_awkarray(array, view_args): # shallow copy of the original. This implies that setting a record field on a slice never modifies the original. # Other fields than records are entirely immutable anyway. # See also https://github.com/scverse/anndata/issues/1035#issuecomment-1687619270. - return array + return AwkArray(array) @as_view.register(CupyArray)