From c1ca8044f40c0a6e6c9d6228c746a1fa0485caf0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 6 Jun 2024 05:49:07 +0200 Subject: [PATCH 1/3] Allow compatible values in __setitem__ --- src/cyclebane/node_values.py | 61 ++++++++++++++++++++++++++++++------ tests/graph_test.py | 60 ++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 14 deletions(-) diff --git a/src/cyclebane/node_values.py b/src/cyclebane/node_values.py index 96b534f..ffc9a4b 100644 --- a/src/cyclebane/node_values.py +++ b/src/cyclebane/node_values.py @@ -4,7 +4,8 @@ from abc import ABC, abstractmethod from collections.abc import Hashable, Iterable, Iterator, Mapping, Sequence -from typing import TYPE_CHECKING, Any, ClassVar +from types import ModuleType +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar if TYPE_CHECKING: import numpy @@ -15,6 +16,8 @@ IndexName = Hashable IndexValue = Hashable +T = TypeVar('T', bound='ValueArray') + class ValueArray(ABC): """ @@ -45,6 +48,17 @@ def from_array_like(values: Any, *, axis_zero: int = 0) -> ValueArray: @abstractmethod def try_from(obj: Any, *, axis_zero: int = 0) -> ValueArray | None: ... + def __eq__(self, other: object) -> bool: + if type(self) != type(other): + return False + return self._equal(other) + + def __ne__(self, other: object) -> bool: + return not self == other + + @abstractmethod + def _equal(self: T, other: T) -> bool: ... + @abstractmethod def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: """Return data by selecting from index with given name and index value.""" @@ -94,6 +108,13 @@ def __init__( def try_from(obj: Any, *, axis_zero: int = 0) -> SequenceAdapter | None: return SequenceAdapter(obj, axis_zero=axis_zero) + def _equal(self, other: SequenceAdapter) -> bool: + return ( + self._values == other._values + and self._index == other._index + and self._axis_zero == other._axis_zero + ) + def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: if len(key) != 1: raise ValueError('SequenceAdapter only supports single index') @@ -133,6 +154,11 @@ def try_from(obj: Any, *, axis_zero: int = 0) -> PandasSeriesAdapter | None: if isinstance(obj, pandas.Series): return PandasSeriesAdapter(obj, axis_zero=axis_zero) + def _equal(self, other: PandasSeriesAdapter) -> bool: + return ( + self._series.equals(other._series) and self._axis_zero == other._axis_zero + ) + def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: if len(key) != 1: raise ValueError('PandasSeriesAdapter only supports single index') @@ -188,6 +214,9 @@ def try_from(obj: Any, *, axis_zero: int = 0) -> XarrayDataArrayAdapter | None: except ModuleNotFoundError: pass + def _equal(self, other: XarrayDataArrayAdapter) -> bool: + return self._data_array.identical(other._data_array) + def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: return self._data_array.sel(dict(key)) @@ -210,15 +239,14 @@ def indices(self) -> dict[IndexName, Iterable[IndexValue]]: class ScippDataArrayAdapter(ValueArray): - def __init__(self, data_array: scipp.DataArray): - import scipp - + def __init__(self, data_array: scipp.DataArray, scipp: ModuleType): default_indices = { dim: scipp.arange(dim, size, unit=None) for dim, size in data_array.sizes.items() if dim not in data_array.coords } self._data_array = data_array.assign_coords(default_indices) + self._scipp = scipp @staticmethod def try_from(obj: Any, *, axis_zero: int = 0) -> ScippDataArrayAdapter | None: @@ -226,15 +254,16 @@ def try_from(obj: Any, *, axis_zero: int = 0) -> ScippDataArrayAdapter | None: import scipp if isinstance(obj, scipp.Variable): - return ScippDataArrayAdapter(scipp.DataArray(obj)) + return ScippDataArrayAdapter(scipp.DataArray(obj), scipp=scipp) if isinstance(obj, scipp.DataArray): - return ScippDataArrayAdapter(obj) + return ScippDataArrayAdapter(obj, scipp=scipp) except ModuleNotFoundError: pass - def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: - import scipp + def _equal(self, other: ScippDataArrayAdapter) -> bool: + return self._scipp.identical(self._data_array, other._data_array) + def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: values = self._data_array for dim, value in key: # Reconstruct label, to use label-based indexing instead of positional @@ -242,7 +271,7 @@ def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: value, unit = value else: unit = None - label = scipp.scalar(value, unit=unit) + label = self._scipp.scalar(value, unit=unit) # Scipp indexing uses a comma to separate dimension label from the index, # unlike Numpy and other libraries where it separates the indices for # different axes. @@ -253,7 +282,7 @@ def __getitem__(self, key: dict[IndexName, slice]) -> ScippDataArrayAdapter: values = self._data_array for dim, i in key: values = values[dim, i] - return ScippDataArrayAdapter(values) + return ScippDataArrayAdapter(values, scipp=self._scipp) @property def shape(self) -> tuple[int, ...]: @@ -307,6 +336,13 @@ def try_from(obj: Any, *, axis_zero: int = 0) -> NumpyArrayAdapter | None: if isinstance(obj, numpy.ndarray): return NumpyArrayAdapter(obj, axis_zero=axis_zero) + def _equal(self, other: NumpyArrayAdapter) -> bool: + return ( + (self._array == other._array).all() + and self._indices == other._indices + and self._axis_zero == other._axis_zero + ) + def sel(self, key: tuple[tuple[IndexName, IndexValue], ...]) -> Any: index_tuple = tuple(self._indices[k].index(i) for k, i in key) return self._array[index_tuple] @@ -374,6 +410,11 @@ def from_mapping( return NodeValues(value_arrays) def merge(self, value_arrays: Mapping[Hashable, ValueArray]) -> NodeValues: + value_arrays = { + key: value + for key, value in value_arrays.items() + if self.get(key, None) != value + } if value_arrays: named = next(iter(value_arrays.values())).index_names if any(name in self.indices for name in named): diff --git a/tests/graph_test.py b/tests/graph_test.py index 7e657fa..fe80de8 100644 --- a/tests/graph_test.py +++ b/tests/graph_test.py @@ -735,15 +735,67 @@ def test_setitem_with_mapped_operands_raises_on_conflict() -> None: def test_setitem_currently_does_not_allow_compatible_indices() -> None: g = nx.DiGraph() - g.add_edge('a', 'c') - g.add_edge('b', 'c') + g.add_edge('a', 'b') + g.add_edge('c', 'd') graph = cb.Graph(g) - mapped = graph.map({'a': [1, 2, 3], 'b': [11, 12, 13]}).reduce('c', name='d') + mapped1 = graph.map({'a': [1, 2, 3]}) + mapped2 = graph['d'].map({'c': [11, 12, 13]}).reduce('d', name='e') # Note: This is a limitation of the current implementation. We could check if the # indices are identical and allow this. For simplicity we currently do not. with pytest.raises(ValueError, match="Conflicting new index names"): - mapped['x'] = mapped['d'] + mapped1['x'] = mapped2 + + +@pytest.mark.parametrize( + 'node_values', + [ + {'a': [1, 2, 3], 'b': [11, 12, 13]}, + {'a': np.array([1, 2, 3]), 'b': np.array([11, 12, 13])}, + pd.DataFrame({'a': [1, 2, 3], 'b': [11, 12, 13]}), + { + 'a': sc.array(dims=['x'], values=[1, 2, 3]), + 'b': sc.array(dims=['x'], values=[11, 12, 13]), + }, + { + 'a': xr.DataArray(dims=('x',), data=[1, 2, 3]), + 'b': xr.DataArray(dims=('x',), data=[11, 12, 13]), + }, + ], +) +def test_setitem_allows_compatible_node_values(node_values) -> None: + g = nx.DiGraph() + g.add_edge('a', 'c') + g.add_edge('b', 'c') + + graph = cb.Graph(g) + mapped = graph.map(node_values).reduce('c', name='d') + mapped['x'] = mapped['d'] + assert len(mapped.index_names) == 1 + + +def test_setitem_raises_if_node_values_equivalent_but_of_different_type() -> None: + g = nx.DiGraph() + g.add_edge('a', 'b') + graph = cb.Graph(g) + mapped1 = graph.map({'a': [1, 2]}).reduce('b', name='d') + mapped2 = graph.map({'a': np.array([1, 2])}).reduce('b', name='d') + # One could imagine treating this as equivalent, but we are strict in the + # comparison. + with pytest.raises(ValueError, match="Conflicting new index names"): + mapped1['x'] = mapped2['d'] + + +def test_setitem_raises_if_node_values_incompatible() -> None: + g = nx.DiGraph() + g.add_edge('a', 'b') + graph = cb.Graph(g) + mapped1 = graph.map({'a': [1, 2]}).reduce('b', name='d') + mapped2 = graph.map({'a': sc.array(dims=('x',), values=[1, 2])}).reduce( + 'b', name='d' + ) + with pytest.raises(ValueError, match="has already been mapped"): + mapped1['x'] = mapped2['d'] def test_setitem_does_currently_not_support_slice_assignment() -> None: From 7326a63bbc6cdfaa16d1cf71cd6de5eba4870f52 Mon Sep 17 00:00:00 2001 From: Simon Heybrock <12912489+SimonHeybrock@users.noreply.github.com> Date: Mon, 10 Jun 2024 12:36:33 +0200 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Jan-Lukas Wynen --- src/cyclebane/node_values.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cyclebane/node_values.py b/src/cyclebane/node_values.py index ffc9a4b..fca8815 100644 --- a/src/cyclebane/node_values.py +++ b/src/cyclebane/node_values.py @@ -49,8 +49,8 @@ def from_array_like(values: Any, *, axis_zero: int = 0) -> ValueArray: def try_from(obj: Any, *, axis_zero: int = 0) -> ValueArray | None: ... def __eq__(self, other: object) -> bool: - if type(self) != type(other): - return False + if type(self) is not type(other): + return NotImplemented return self._equal(other) def __ne__(self, other: object) -> bool: From ff35dfedcf7d067df65a06f6d77cef5de32c54f3 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 10 Jun 2024 12:38:46 +0200 Subject: [PATCH 3/3] Add more test cases --- tests/graph_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/graph_test.py b/tests/graph_test.py index fe80de8..d036679 100644 --- a/tests/graph_test.py +++ b/tests/graph_test.py @@ -750,13 +750,18 @@ def test_setitem_currently_does_not_allow_compatible_indices() -> None: @pytest.mark.parametrize( 'node_values', [ + {'a': [1, 2, 3]}, {'a': [1, 2, 3], 'b': [11, 12, 13]}, + {'a': np.array([1, 2, 3])}, {'a': np.array([1, 2, 3]), 'b': np.array([11, 12, 13])}, + pd.DataFrame({'a': [1, 2, 3]}), pd.DataFrame({'a': [1, 2, 3], 'b': [11, 12, 13]}), + {'a': sc.array(dims=['x'], values=[1, 2, 3])}, { 'a': sc.array(dims=['x'], values=[1, 2, 3]), 'b': sc.array(dims=['x'], values=[11, 12, 13]), }, + {'a': xr.DataArray(dims=('x',), data=[1, 2, 3])}, { 'a': xr.DataArray(dims=('x',), data=[1, 2, 3]), 'b': xr.DataArray(dims=('x',), data=[11, 12, 13]),