Skip to content

Commit

Permalink
Merge pull request #288 from davidhassell/compress
Browse files Browse the repository at this point in the history
Allow discrete sampling geometries with 1-d data to be written as ragged arrays, and improve the compression process
  • Loading branch information
davidhassell authored Feb 21, 2024
2 parents 92f8fdd + f54ef95 commit 8b535c4
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 27 deletions.
10 changes: 10 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
Version 1.11.1.0
----------------

**2024-??-??**

* New keyword parameter to `cfdm.Field.insert_dimension`:
``constructs`` (https://github.com/NCAS-CMS/cfdm/issues/287)

----

Version 1.11.0.0
----------------

Expand Down
139 changes: 114 additions & 25 deletions cfdm/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Constructs,
Count,
Domain,
DomainAxis,
Index,
List,
core,
Expand Down Expand Up @@ -105,6 +106,7 @@ def __new__(cls, *args, **kwargs):
instance._AuxiliaryCoordinate = AuxiliaryCoordinate
instance._Constructs = Constructs
instance._Domain = Domain
instance._DomainAxis = DomainAxis
instance._RaggedContiguousArray = RaggedContiguousArray
instance._RaggedIndexedArray = RaggedIndexedArray
instance._RaggedIndexedContiguousArray = RaggedIndexedContiguousArray
Expand Down Expand Up @@ -1032,17 +1034,13 @@ def _RaggedContiguousArray(
return self._RaggedContiguousArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
count_variable=count_variable,
)

def _RaggedIndexedArray(self, compressed_data, data, index_variable):
return self._RaggedIndexedArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
index_variable=index_variable,
)

Expand All @@ -1052,8 +1050,6 @@ def _RaggedIndexedContiguousArray(
return self._RaggedIndexedContiguousArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
count_variable=count_variable,
index_variable=index_variable,
)
Expand Down Expand Up @@ -1173,6 +1169,37 @@ def _compress_metadata(
y = Array_func(f, compressed_data, data=data, **kwargs)
data._set_CompressedArray(y, copy=False)

def _derive_count(flattened_data):
"""Derive the DSG count for each feature.
:Parameters:
flattened_data: array_like
The 2-d flattened array from which to derive the
counts. The leading dimension is the number of
features.
:Returns:
`list`
The count for each feature.
"""
count = []
masked = np.ma.masked
for d in flattened_data:
d = d.array
last = d.size
for i in d[::-1]:
if i is not masked:
break

last -= 1

count.append(last)

return count

f = _inplace_enabled_define_and_cleanup(self)

data = f.get_data(None)
Expand Down Expand Up @@ -1224,18 +1251,25 @@ def _compress_metadata(
# --------------------------------------------------------
flattened_data = data.flatten(range(data.ndim - 1))

count = []
masked = np.ma.masked
for d in flattened_data:
d = d.array
last = d.size
for i in d[::-1]:
if i is not masked:
break
# Try to get the counts from an auxiliary coordinate
# construct that spans the same axes as the field data
count = None
data_axes = f.get_data_axes()
construct_axes = f.constructs.data_axes()
for key, c in (
f.auxiliary_coordinates().filter_by_data(todict=True).items()
):
if construct_axes[key] != data_axes:
continue

last -= 1
count = _derive_count(c.data.flatten(range(c.ndim - 1)))
break

count.append(last)
if count is None:
# When no auxiliary coordinate constructs span the
# field data dimensions, get the counts from the field
# data.
count = _derive_count(flattened_data)

N = sum(count)
compressed_field_data = _empty_compressed_data(data, (N,))
Expand Down Expand Up @@ -2081,7 +2115,9 @@ def indices(self, **kwargs):
return tuple([indices[axis] for axis in self.get_data_axes()])

@_inplace_enabled(default=False)
def insert_dimension(self, axis, position=0, inplace=False):
def insert_dimension(
self, axis, position=0, constructs=False, inplace=False
):
"""Expand the shape of the data array.
Inserts a new size 1 axis, corresponding to an existing domain
Expand All @@ -2097,6 +2133,9 @@ def insert_dimension(self, axis, position=0, inplace=False):
The identifier of the domain axis construct
corresponding to the inserted axis.
If *axis* is `None` then a new domain axis construct
will be created for the inserted dimension.
*Parameter example:*
``axis='domainaxis2'``
Expand All @@ -2112,6 +2151,13 @@ def insert_dimension(self, axis, position=0, inplace=False):
*Parameter example:*
``position=-1``
constructs: `bool`
If True then also insert the new axis into all
metadata constructs that don't already include it. By
default, metadata constructs are not changed.
.. versionadded:: (cfdm) 1.11.1.0
{{inplace: `bool`, optional}}
:Returns:
Expand All @@ -2132,22 +2178,30 @@ def insert_dimension(self, axis, position=0, inplace=False):
(19, 73, 1, 96)
>>> f.data.shape
(19, 73, 1, 96)
>>> f.insert_dimension(None, 1).data.shape
(19, 1, 73, 1, 96)
"""
f = _inplace_enabled_define_and_cleanup(self)

domain_axis = f.domain_axes(todict=True).get(axis)
if domain_axis is None:
raise ValueError(f"Can't insert non-existent domain axis: {axis}")

if domain_axis.get_size() != 1:
raise ValueError(
f"Can only insert axis of size 1. Axis {axis!r} has size "
f"{domain_axis.get_size()}"
if axis is None:
axis = f.set_construct(self._DomainAxis(1))
else:
axis, domain_axis = f.domain_axis(
axis,
item=True,
default=ValueError("Can't identify a unique axis to insert"),
)

if domain_axis.get_size() != 1:
raise ValueError(
f"Can only insert axis of size 1. Axis {axis!r} has size "
f"{domain_axis.get_size()}"
)

data_axes = f.get_data_axes(default=None)
if data_axes is not None:
data_axes0 = data_axes[:]
if axis in data_axes:
raise ValueError(
f"Can't insert a duplicate data array axis: {axis!r}"
Expand All @@ -2159,9 +2213,44 @@ def insert_dimension(self, axis, position=0, inplace=False):
# Expand the dims in the field's data array
super(Field, f).insert_dimension(position, inplace=True)

# Update the axes
if data_axes is not None:
f.set_data_axes(data_axes)

if constructs:
if data_axes is None:
data_axes0 = []
position = 0

for key, construct in f.constructs.filter_by_data(
todict=True
).items():
data = construct.get_data(
None, _units=False, _fill_value=False
)
if data is None:
continue

construct_axes = list(f.get_data_axes(key))
if axis in construct_axes:
continue

# Find the position of the new axis
c_position = position
for a in data_axes0:
if a not in construct_axes:
c_position -= 1

if c_position < 0:
c_position = 0

# Expand the dims in the construct's data array
construct.insert_dimension(c_position, inplace=True)

# Update the construct axes
construct_axes.insert(c_position, axis)
f.set_data_axes(axes=construct_axes, key=key)

return f

def convert(self, *identity, full_domain=True, **filter_kwargs):
Expand Down
11 changes: 9 additions & 2 deletions cfdm/test/test_Field.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,8 @@ def test_Field_has_construct(self):
f.set_construct(cfdm.DomainAxis(0), key="")
self.assertTrue(f.has_construct(""))

def test_Field_squeeze_transpose_insert_dimension(self):
"""Test squeeze, transpose and `insert_dimension` methods."""
def test_Field_squeeze_transpose(self):
"""Test squeeze and transpose methods."""
f = self.f1

g = f.transpose()
Expand All @@ -555,6 +555,9 @@ def test_Field_squeeze_transpose_insert_dimension(self):
(g.get_data_axes(), f.get_data_axes()),
)

def test_Field_insert_dimension(self):
"""Test cfdm.Field.insert_dimension method."""
f = self.f1
g = f.copy()

key = g.set_construct(cfdm.DomainAxis(1))
Expand All @@ -567,6 +570,10 @@ def test_Field_squeeze_transpose_insert_dimension(self):
self.assertEqual(h.data.ndim, f.data.ndim + 1)
self.assertEqual(h.get_data_axes()[:-1], f.get_data_axes())

self.assertEqual(g.cell_measure().ndim, 2)
h = g.insert_dimension(None, constructs=True)
self.assertEqual(h.cell_measure().ndim, 3)

def test_Field_compress_uncompress(self):
"""Test the compress and uncompress Field methods."""
contiguous = os.path.join(
Expand Down

0 comments on commit 8b535c4

Please sign in to comment.