Skip to content

Commit

Permalink
Merge pull request #665 from PMEAL/fix_interleave_data
Browse files Browse the repository at this point in the history
fix the bug in interleave data
  • Loading branch information
jgostick authored Feb 16, 2017
2 parents 58efd04 + 8f57eda commit f9cdb7d
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 106 deletions.
163 changes: 75 additions & 88 deletions OpenPNM/Base/__Core__.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,8 +833,11 @@ def _interleave_data(self, prop, sources):
Notes
-----
This makes an effort to maintain the data 'type' when possible; however
when data is missing this can be tricky. Float and boolean data is
fine, but missing ints are converted to float when nans are inserted.
when data is missing this can be tricky. Data can be missing in two
different ways: A set of pores is not assisgned to a geometry or the
network contains multiple geometries and data does not exist on all.
Float and boolean data is fine, but missing ints are converted to float
when nans are inserted.
Examples
--------
Expand All @@ -851,103 +854,87 @@ def _interleave_data(self, prop, sources):
>>> Ts = pn.find_neighbor_throats(pores=Ps,
... mode='not_intersection')
>>> boun = OpenPNM.Geometry.Boundary(network=pn, pores=Ps, throats=Ts)
>>> geom['pore.test_float'] = sp.random.random(geom.Np)
>>> print(sp.sum(~sp.isnan(pn['pore.test_float'])) == geom.Np)
True
>>> boun['pore.test_float'] = sp.random.random(boun.Np)
>>> print(sp.sum(~sp.isnan(pn['pore.test_float'])) == pn.Np)
True
>>> geom['pore.test_int'] = sp.random.randint(0, 100, geom.Np)
>>> print(pn['pore.test_int'].dtype)
float64
>>> print(pn['pore.test_int'].dtype.name.startswith('float'))
True
>>> boun['pore.test_int'] = sp.ones(boun.Np).astype(int)
>>> boun['pore.test_int'] = sp.rand(boun.Np) < 0.5
>>> print(pn['pore.test_int'].dtype)
bool
>>> geom['pore.test_bool'] = sp.rand(geom.Np) < 0.5
>>> print(pn['pore.test_bool'].dtype)
bool
>>> boun['pore.test_bool'] = sp.ones(boun.Np).astype(int)
>>> print(pn['pore.test_bool'].dtype)
bool
>>> boun['pore.test_bool'] = sp.rand(boun.Np) < 0.5
>>> print(pn['pore.test_bool'].dtype)
bool
>>> print(pn['pore.test_int'].dtype.name.startswith('int'))
True
>>> geom['pore.test_bool'] = True
>>> print(sp.sum(pn['pore.test_bool']) == geom.Np)
True
>>> boun['pore.test_bool'] = True
>>> print(sp.sum(pn['pore.test_bool']) == pn.Np)
True
"""
element = prop.split('.')[0]
element = self._parse_element(element)
temp = sp.ndarray((self._count(element)))
nan_locs = sp.ndarray((self._count(element)), dtype='bool')
nan_locs.fill(False)
bool_locs = sp.ndarray((self._count(element)), dtype='bool')
bool_locs.fill(False)
dtypes = []
dtypenames = []
prop_found = False # Flag to indicate if prop found on a sub-object
values_dim = 0
element = self._parse_element(prop.split('.')[0], single=True)
N = self._net._count(element)

# Make sure sources contains objects, not just names of objects
temp_sources = []
for item in sources:
# Check if sources were given as list of objects OR names
try:
item.name
except:
item = self._find_object(obj_name=item)
locations = self._get_indices(element=element,
labels=item.name,
mode='union')
if prop not in item.keys():
values = sp.ones_like(temp[locations])*sp.nan
dtypenames.append('nan')
dtypes.append(sp.dtype(bool))
nan_locs[locations] = True
else:
prop_found = True
values = item[prop]
dtypenames.append(values.dtype.name)
dtypes.append(values.dtype)
if values.dtype == 'bool':
bool_locs[locations] = True
try:
values_dim = sp.shape(values)[1]
except:
pass
if values_dim > 0:
try:
temp_dim = sp.shape(temp)[1]
if temp_dim != values_dim:
logger.warning(prop+' data has different dimensions,' +
'consider revising data in object ' +
str(item.name))
except:
temp = sp.ndarray([self._count(element), values_dim])
if values.dtype == 'object' and temp.dtype != 'object':
temp = temp.astype('object')
temp[locations] = values # Assign values
# Check if requested prop was found on any sub-objects
if prop_found is False:
temp_sources.append(item)
sources = temp_sources

# Attempt to fetch the requested prop array from each object
arrs = [item.get(prop) for item in sources]
locs = [item._net._get_indices(element, item.name) for item in sources]
sizes = [sp.size(a) for a in arrs]
if all([item is None for item in arrs]): # prop not found anywhere
raise KeyError(prop)
# Analyze and assign data type
types_temp = ['int', 'nan', 'float', 'int32', 'int64', 'float32',
'float64', 'bool']
if sp.all([t in ['bool', 'nan'] for t in dtypenames]):
# If all entries are 'bool' (or 'nan')
temp = sp.array(temp, dtype='bool')
if sp.sum(nan_locs) > 0:
temp[nan_locs] = False
elif sp.all([t == dtypenames[0] for t in dtypenames]):
# If all entries are same type
temp = sp.array(temp, dtype=dtypes[0])
elif sp.all([t in types_temp for t in dtypenames]):
# If all entries are 'bool' (or 'nan')
if 'bool' in dtypenames:
temp = sp.array(temp, dtype='bool')
temp[~bool_locs] = False
logger.info(prop+' has been converted to bool,' +
' some data may be lost')
else:
temp = sp.array(temp, dtype='float')
logger.info(prop+' has been converted to float.')
elif sp.all([t in ['object', 'nan'] for t in dtypenames]):
# If all entries are 'bool' (or 'nan')
pass
if sp.any([i is None for i in arrs]): # prop not found everywhere
logger.warning('\''+prop+'\' not found on at least one object')

# Check the general type of each array
atype = []
for a in arrs:
if a is not None:
t = a.dtype.name
if t.startswith('int') or t.startswith('float'):
atype.append('numeric')
elif t.startswith('bool'):
atype.append('boolean')
else:
atype.append('other')
if not all([item == atype[0] for item in atype]):
raise Exception('The array types are not compatible')
else:
temp = sp.array(temp, dtype=max(dtypes))
logger.info('Data type of '+prop+' differs between sub-objects' +
'...converting to larger data type')
return temp
dummy_val = {'numeric': sp.nan, 'boolean': False, 'other': None}

# Create an empty array of the right type and shape
for item in arrs:
if item is not None:
if len(item.shape) == 1:
temp_arr = sp.zeros((N, ), dtype=item.dtype)
else:
temp_arr = sp.zeros((N, item.shape[1]), dtype=item.dtype)
temp_arr.fill(dummy_val[atype[0]])

# Convrert int arrays to float IF NaNs are expected
if (temp_arr.dtype.name.startswith('int') and
(sp.any([i is None for i in arrs]) or
sp.sum(sizes) != N)):
temp_arr = temp_arr.astype(float)
temp_arr.fill(sp.nan)

# Fill new array with values in the corresponding locations
for vals, inds in zip(arrs, locs):
if vals is not None:
temp_arr[inds] = vals
else:
temp_arr[inds] = dummy_val[atype[0]]
return temp_arr

def num_pores(self, labels='all', mode='union'):
r"""
Expand Down
4 changes: 2 additions & 2 deletions OpenPNM/Network/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,8 +741,8 @@ def _template_sphere_disc(dim, outer_radius, inner_radius):
"""
rmax = _np.array(outer_radius, ndmin=1)
rmin = _np.array(inner_radius, ndmin=1)
ind = _np.array(_np.around(2 * rmax - 1), dtype=int)
coord = _np.indices(ind * _np.ones(int(dim), dtype=int))
ind = 2 * rmax - 1
coord = _np.indices((ind * _np.ones(dim, dtype=int)))
coord = coord - (ind - 1)/2
x = coord[0, :]
y = coord[1, :]
Expand Down
4 changes: 2 additions & 2 deletions OpenPNM/Utilities/IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,7 @@ def load(cls, path, network=None, voxel_size=1, return_geometry=False):
net['pore.ID_number'][i] = ID
net['pore.boundary_type'][i] = _sp.fromfile(file=f, count=1,
dtype='u1')
z = int(_sp.fromfile(file=f, count=1, dtype='u4'))
z = _sp.fromfile(file=f, count=1, dtype='u4')[0]
net['pore.coordination'][i] = z
att_pores = _sp.fromfile(file=f, count=z, dtype='u4')
att_throats = _sp.fromfile(file=f, count=z, dtype='u4')
Expand All @@ -1249,7 +1249,7 @@ def load(cls, path, network=None, voxel_size=1, return_geometry=False):
net['pore.coords'] = _sp.array([ni, nj, nk]).T

with open(th2np_file, mode='rb') as f:
[Nt] = _sp.fromfile(file=f, count=1, dtype='u4')
Nt = _sp.fromfile(file=f, count=1, dtype='u4')[0]
net['throat.area'] = _sp.ones([Nt, ], dtype=int)*(-1)
for i in range(0, Nt):
ID = _sp.fromfile(file=f, count=1, dtype='u4')
Expand Down
28 changes: 14 additions & 14 deletions docs/userguide/reference/data_storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ The only distinction between *labels* and *properties* is that *labels* are Bool
.. code-block:: python
>>> pn['pore.top'][2] = False
>>> sp.where(pn['pore.top'])[0]
array([ 5, 8, 11, 14, 17, 20, 23, 26])
>>> list(sp.where(pn['pore.top'])[0])
[5, 8, 11, 14, 17, 20, 23, 26]
>>> pn['pore.top'][2] = True # Re-apply label to pore 2
Creating a new label array occurs automatically if a Boolean array is stored on an object:
Expand All @@ -104,22 +104,22 @@ The *label* functionality uses Scipy's ``where`` method to return a list of loca

.. code-block:: python
>>> sp.where(pn['pore.dummy_2'])[0]
array([3, 4, 5])
>>> list(sp.where(pn['pore.dummy_2'])[0])
[3, 4, 5]
The ``pores`` and ``throats`` methods offer several useful enhancements to this approach. For instance, several labels can be queried at once:

.. code-block:: python
>>> pn.pores(['top', 'dummy_2'])
array([ 2, 3, 4, 5, 8, 11, 14, 17, 20, 23, 26])
>>> list(pn.pores(['top', 'dummy_2']))
[2, 3, 4, 5, 8, 11, 14, 17, 20, 23, 26]
And there is also a ``mode`` argument which can be used to apply *set theory* logic to the returned list:

.. code-block:: python
>>> pn.pores(['top', 'dummy_2'], mode='intersection')
array([5])
>>> list(pn.pores(['top', 'dummy_2'], mode='intersection'))
[5]
This *set* logic basically retrieves a list of all pores with the label ``'top'`` and a second list of pores with the label ``dummy_2``, and returns the ``'intersection'`` of these lists, or only pores that appear in both lists.

Expand Down Expand Up @@ -161,8 +161,8 @@ Another highly used feature is to retrieve a list of pores or throats that have

.. code-block:: python
>>> pn.pores('top')
array([ 2, 5, 8, 11, 14, 17, 20, 23, 26])
>>> list(pn.pores('top'))
[2, 5, 8, 11, 14, 17, 20, 23, 26]
The ``pores`` and ``throats`` methods both accept a *'mode'* argument that allows for *set-theory* logic to be applied to the query, such as returning 'unions' and 'intersections' of locations.

Expand All @@ -173,10 +173,10 @@ It is also possible to filter a list of pores or throats according to their labe
.. code-block:: python
>>> Ps = pn.pores('top')
>>> Ps
array([ 2, 5, 8, 11, 14, 17, 20, 23, 26])
>>> pn.filter_by_label(pores=Ps, labels='left')
array([ 2, 11, 20])
>>> list(Ps)
[2, 5, 8, 11, 14, 17, 20, 23, 26]
>>> list(pn.filter_by_label(pores=Ps, labels='left'))
[2, 11, 20]
The ``filter_by_label`` method also accepts a ``mode`` argument that applies additional filtering to the returned list using *set-theory*-type logic. In this case, the method will find sets of pores or throats that satisfies each given label, then determines the *union*, *intersection*, or *difference* of the given sets.

Expand Down
89 changes: 89 additions & 0 deletions test/unit/Base/CoreTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,3 +908,92 @@ def test_check_data_health(self):
assert not a.health
assert a['pore.data_test'] == 'Has NaNs'
del self.net['pore.data_test']

def test_interleave_data_key_error(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
Ps = net.pores('top')
geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
Ps = net.pores('bottom')
geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
with pytest.raises(KeyError):
net['pore.blah']
with pytest.raises(KeyError):
geom1['pore.blah']
with pytest.raises(KeyError):
geom2['pore.blah']

def test_interleave_data_bool(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
Ps = net.pores('top')
geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
Ps = net.pores('bottom')
geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
# Ensure Falses return in missing places
geom1['pore.blah'] = True
assert sp.all(~geom2['pore.blah'])
assert sp.sum(net['pore.blah']) == 4
# Ensure all Trues returned now
geom2['pore.blah'] = True
assert sp.all(geom2['pore.blah'])
assert sp.sum(net['pore.blah']) == 8

def test_interleave_data_int(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
Ps = net.pores('top')
geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
Ps = net.pores('bottom')
geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
geom1['pore.blah'] = 1
# Ensure ints are returned geom1
assert 'int' in geom1['pore.blah'].dtype.name
# Ensure nans are returned on geom2
assert sp.all(sp.isnan(geom2['pore.blah']))
# Ensure interleaved array is float with nans
assert 'float' in net['pore.blah'].dtype.name
# Ensure missing values are floats
assert sp.sum(sp.isnan(net['pore.blah'])) == 4

def test_interleave_data_float(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
Ps = net.pores('top')
geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
Ps = net.pores('bottom')
geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
geom1['pore.blah'] = 1.0
# Ensure flaots are returned geom1
assert 'float' in geom1['pore.blah'].dtype.name
# Ensure nans are returned on geom2
assert sp.all(sp.isnan(geom2['pore.blah']))
# Ensure interleaved array is float with nans
assert 'float' in net['pore.blah'].dtype.name
# Ensure missing values are floats
assert sp.sum(sp.isnan(net['pore.blah'])) == 4

def test_interleave_data_object(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
Ps = net.pores('top')
geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
Ps = net.pores('bottom')
geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
geom1['pore.blah'] = [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1]]
assert 'object' in net['pore.blah'].dtype.name
# Ensure missing elements are None
assert sp.sum([item is None for item in net['pore.blah']]) == 4

def test_interleave_data_float_missing_geometry(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
geom['pore.blah'] = 1.0
assert sp.any(sp.isnan(net['pore.blah']))

def test_interleave_data_int_missing_geometry(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
geom['pore.blah'] = 1
assert sp.any(sp.isnan(net['pore.blah']))

def test_interleave_data_bool_missing_geometry(self):
net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
geom['pore.blah'] = True
assert sp.sum(net['pore.blah']) == geom.Np
2 changes: 2 additions & 0 deletions test/unit/Utilities/UtilitiesMiscTest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import OpenPNM
import OpenPNM.Utilities.misc as misc
import scipy as sp
import time


class UtilitiesMiscTest:
Expand Down Expand Up @@ -41,6 +42,7 @@ def test_tic_toc(self):
assert t is None
t = misc.toc(quiet=False)
assert t is None
time.sleep(0.005)
t = misc.toc(quiet=True)
assert t > 0

Expand Down

0 comments on commit f9cdb7d

Please sign in to comment.