Merge pull request #665 from PMEAL/fix_interleave_data

fix the bug in interleave data
PMEAL · Feb 16, 2017 · f9cdb7d · f9cdb7d
2 parents 58efd04 + 8f57eda
commit f9cdb7d
Show file tree

Hide file tree

Showing 6 changed files with 184 additions and 106 deletions.
diff --git a/OpenPNM/Base/__Core__.py b/OpenPNM/Base/__Core__.py
@@ -833,8 +833,11 @@ def _interleave_data(self, prop, sources):
         Notes
         -----
         This makes an effort to maintain the data 'type' when possible; however
-        when data is missing this can be tricky.  Float and boolean data is
-        fine, but missing ints are converted to float when nans are inserted.
+        when data is missing this can be tricky.  Data can be missing in two
+        different ways: A set of pores is not assisgned to a geometry or the
+        network contains multiple geometries and data does not exist on all.
+        Float and boolean data is fine, but missing ints are converted to float
+        when nans are inserted.
 
         Examples
         --------
@@ -851,103 +854,87 @@ def _interleave_data(self, prop, sources):
         >>> Ts = pn.find_neighbor_throats(pores=Ps,
         ...                               mode='not_intersection')
         >>> boun = OpenPNM.Geometry.Boundary(network=pn, pores=Ps, throats=Ts)
+        >>> geom['pore.test_float'] = sp.random.random(geom.Np)
+        >>> print(sp.sum(~sp.isnan(pn['pore.test_float'])) == geom.Np)
+        True
+        >>> boun['pore.test_float'] = sp.random.random(boun.Np)
+        >>> print(sp.sum(~sp.isnan(pn['pore.test_float'])) == pn.Np)
+        True
         >>> geom['pore.test_int'] = sp.random.randint(0, 100, geom.Np)
-        >>> print(pn['pore.test_int'].dtype)
-        float64
+        >>> print(pn['pore.test_int'].dtype.name.startswith('float'))
+        True
         >>> boun['pore.test_int'] = sp.ones(boun.Np).astype(int)
-        >>> boun['pore.test_int'] = sp.rand(boun.Np) < 0.5
-        >>> print(pn['pore.test_int'].dtype)
-        bool
-        >>> geom['pore.test_bool'] = sp.rand(geom.Np) < 0.5
-        >>> print(pn['pore.test_bool'].dtype)
-        bool
-        >>> boun['pore.test_bool'] = sp.ones(boun.Np).astype(int)
-        >>> print(pn['pore.test_bool'].dtype)
-        bool
-        >>> boun['pore.test_bool'] = sp.rand(boun.Np) < 0.5
-        >>> print(pn['pore.test_bool'].dtype)
-        bool
+        >>> print(pn['pore.test_int'].dtype.name.startswith('int'))
+        True
+        >>> geom['pore.test_bool'] = True
+        >>> print(sp.sum(pn['pore.test_bool']) == geom.Np)
+        True
+        >>> boun['pore.test_bool'] = True
+        >>> print(sp.sum(pn['pore.test_bool']) == pn.Np)
+        True
         """
-        element = prop.split('.')[0]
-        element = self._parse_element(element)
-        temp = sp.ndarray((self._count(element)))
-        nan_locs = sp.ndarray((self._count(element)), dtype='bool')
-        nan_locs.fill(False)
-        bool_locs = sp.ndarray((self._count(element)), dtype='bool')
-        bool_locs.fill(False)
-        dtypes = []
-        dtypenames = []
-        prop_found = False  # Flag to indicate if prop found on a sub-object
-        values_dim = 0
+        element = self._parse_element(prop.split('.')[0], single=True)
+        N = self._net._count(element)
+
+        # Make sure sources contains objects, not just names of objects
+        temp_sources = []
         for item in sources:
             # Check if sources were given as list of objects OR names
             try:
                 item.name
             except:
                 item = self._find_object(obj_name=item)
-            locations = self._get_indices(element=element,
-                                          labels=item.name,
-                                          mode='union')
-            if prop not in item.keys():
-                values = sp.ones_like(temp[locations])*sp.nan
-                dtypenames.append('nan')
-                dtypes.append(sp.dtype(bool))
-                nan_locs[locations] = True
-            else:
-                prop_found = True
-                values = item[prop]
-                dtypenames.append(values.dtype.name)
-                dtypes.append(values.dtype)
-                if values.dtype == 'bool':
-                    bool_locs[locations] = True
-                try:
-                    values_dim = sp.shape(values)[1]
-                except:
-                    pass
-            if values_dim > 0:
-                try:
-                    temp_dim = sp.shape(temp)[1]
-                    if temp_dim != values_dim:
-                        logger.warning(prop+' data has different dimensions,' +
-                                       'consider revising data in object ' +
-                                       str(item.name))
-                except:
-                    temp = sp.ndarray([self._count(element), values_dim])
-            if values.dtype == 'object' and temp.dtype != 'object':
-                temp = temp.astype('object')
-            temp[locations] = values  # Assign values
-        # Check if requested prop was found on any sub-objects
-        if prop_found is False:
+            temp_sources.append(item)
+        sources = temp_sources
+
+        # Attempt to fetch the requested prop array from each object
+        arrs = [item.get(prop) for item in sources]
+        locs = [item._net._get_indices(element, item.name) for item in sources]
+        sizes = [sp.size(a) for a in arrs]
+        if all([item is None for item in arrs]):  # prop not found anywhere
             raise KeyError(prop)
-        # Analyze and assign data type
-        types_temp = ['int', 'nan', 'float', 'int32', 'int64', 'float32',
-                      'float64', 'bool']
-        if sp.all([t in ['bool', 'nan'] for t in dtypenames]):
-            # If all entries are 'bool' (or 'nan')
-            temp = sp.array(temp, dtype='bool')
-            if sp.sum(nan_locs) > 0:
-                temp[nan_locs] = False
-        elif sp.all([t == dtypenames[0] for t in dtypenames]):
-            # If all entries are same type
-            temp = sp.array(temp, dtype=dtypes[0])
-        elif sp.all([t in types_temp for t in dtypenames]):
-            # If all entries are 'bool' (or 'nan')
-            if 'bool' in dtypenames:
-                temp = sp.array(temp, dtype='bool')
-                temp[~bool_locs] = False
-                logger.info(prop+' has been converted to bool,' +
-                            ' some data may be lost')
-            else:
-                temp = sp.array(temp, dtype='float')
-                logger.info(prop+' has been converted to float.')
-        elif sp.all([t in ['object', 'nan'] for t in dtypenames]):
-            # If all entries are 'bool' (or 'nan')
-            pass
+        if sp.any([i is None for i in arrs]):  # prop not found everywhere
+            logger.warning('\''+prop+'\' not found on at least one object')
+
+        # Check the general type of each array
+        atype = []
+        for a in arrs:
+            if a is not None:
+                t = a.dtype.name
+                if t.startswith('int') or t.startswith('float'):
+                    atype.append('numeric')
+                elif t.startswith('bool'):
+                    atype.append('boolean')
+                else:
+                    atype.append('other')
+        if not all([item == atype[0] for item in atype]):
+            raise Exception('The array types are not compatible')
         else:
-            temp = sp.array(temp, dtype=max(dtypes))
-            logger.info('Data type of '+prop+' differs between sub-objects' +
-                        '...converting to larger data type')
-        return temp
+            dummy_val = {'numeric': sp.nan, 'boolean': False, 'other': None}
+
+        # Create an empty array of the right type and shape
+        for item in arrs:
+            if item is not None:
+                if len(item.shape) == 1:
+                    temp_arr = sp.zeros((N, ), dtype=item.dtype)
+                else:
+                    temp_arr = sp.zeros((N, item.shape[1]), dtype=item.dtype)
+                temp_arr.fill(dummy_val[atype[0]])
+
+        # Convrert int arrays to float IF NaNs are expected
+        if (temp_arr.dtype.name.startswith('int') and
+            (sp.any([i is None for i in arrs]) or
+             sp.sum(sizes) != N)):
+            temp_arr = temp_arr.astype(float)
+            temp_arr.fill(sp.nan)
+
+        # Fill new array with values in the corresponding locations
+        for vals, inds in zip(arrs, locs):
+            if vals is not None:
+                temp_arr[inds] = vals
+            else:
+                temp_arr[inds] = dummy_val[atype[0]]
+        return temp_arr
 
     def num_pores(self, labels='all', mode='union'):
         r"""

diff --git a/OpenPNM/Network/tools.py b/OpenPNM/Network/tools.py
@@ -741,8 +741,8 @@ def _template_sphere_disc(dim, outer_radius, inner_radius):
     """
     rmax = _np.array(outer_radius, ndmin=1)
     rmin = _np.array(inner_radius, ndmin=1)
-    ind = _np.array(_np.around(2 * rmax - 1), dtype=int)
-    coord = _np.indices(ind * _np.ones(int(dim), dtype=int))
+    ind = 2 * rmax - 1
+    coord = _np.indices((ind * _np.ones(dim, dtype=int)))
     coord = coord - (ind - 1)/2
     x = coord[0, :]
     y = coord[1, :]

diff --git a/OpenPNM/Utilities/IO.py b/OpenPNM/Utilities/IO.py
@@ -1229,7 +1229,7 @@ def load(cls, path, network=None, voxel_size=1, return_geometry=False):
                 net['pore.ID_number'][i] = ID
                 net['pore.boundary_type'][i] = _sp.fromfile(file=f, count=1,
                                                             dtype='u1')
-                z = int(_sp.fromfile(file=f, count=1, dtype='u4'))
+                z = _sp.fromfile(file=f, count=1, dtype='u4')[0]
                 net['pore.coordination'][i] = z
                 att_pores = _sp.fromfile(file=f, count=z, dtype='u4')
                 att_throats = _sp.fromfile(file=f, count=z, dtype='u4')
@@ -1249,7 +1249,7 @@ def load(cls, path, network=None, voxel_size=1, return_geometry=False):
             net['pore.coords'] = _sp.array([ni, nj, nk]).T
 
         with open(th2np_file, mode='rb') as f:
-            [Nt] = _sp.fromfile(file=f, count=1, dtype='u4')
+            Nt = _sp.fromfile(file=f, count=1, dtype='u4')[0]
             net['throat.area'] = _sp.ones([Nt, ], dtype=int)*(-1)
             for i in range(0, Nt):
                 ID = _sp.fromfile(file=f, count=1, dtype='u4')

diff --git a/docs/userguide/reference/data_storage.rst b/docs/userguide/reference/data_storage.rst
@@ -81,8 +81,8 @@ The only distinction between *labels* and *properties* is that *labels* are Bool
 .. code-block:: python
 
     >>> pn['pore.top'][2] = False
-    >>> sp.where(pn['pore.top'])[0]
-    array([ 5,  8, 11, 14, 17, 20, 23, 26])
+    >>> list(sp.where(pn['pore.top'])[0])
+    [5, 8, 11, 14, 17, 20, 23, 26]
     >>> pn['pore.top'][2] = True  # Re-apply label to pore 2
 
 Creating a new label array occurs automatically if a Boolean array is stored on an object:
@@ -104,22 +104,22 @@ The *label* functionality uses Scipy's ``where`` method to return a list of loca
 
 .. code-block:: python
 
-    >>> sp.where(pn['pore.dummy_2'])[0]
-    array([3, 4, 5])
+    >>> list(sp.where(pn['pore.dummy_2'])[0])
+    [3, 4, 5]
 
 The ``pores`` and ``throats`` methods offer several useful enhancements to this approach.  For instance, several labels can be queried at once:
 
 .. code-block:: python
 
-    >>> pn.pores(['top', 'dummy_2'])
-    array([ 2,  3,  4,  5,  8, 11, 14, 17, 20, 23, 26])
+    >>> list(pn.pores(['top', 'dummy_2']))
+    [2, 3, 4, 5, 8, 11, 14, 17, 20, 23, 26]
 
 And there is also a ``mode`` argument which can be used to apply *set theory* logic to the returned list:
 
 .. code-block:: python
 
-    >>> pn.pores(['top', 'dummy_2'], mode='intersection')
-    array([5])
+    >>> list(pn.pores(['top', 'dummy_2'], mode='intersection'))
+    [5]
 
 This *set* logic basically retrieves a list of all pores with the label ``'top'`` and a second list of pores with the label ``dummy_2``, and returns the ``'intersection'`` of these lists, or only pores that appear in both lists.
 
@@ -161,8 +161,8 @@ Another highly used feature is to retrieve a list of pores or throats that have
 
 .. code-block:: python
 
-    >>> pn.pores('top')
-    array([ 2,  5,  8, 11, 14, 17, 20, 23, 26])
+    >>> list(pn.pores('top'))
+    [2, 5, 8, 11, 14, 17, 20, 23, 26]
 
 The ``pores`` and ``throats`` methods both accept a *'mode'* argument that allows for *set-theory* logic to be applied to the query, such as returning 'unions' and 'intersections' of locations.
 
@@ -173,10 +173,10 @@ It is also possible to filter a list of pores or throats according to their labe
 .. code-block:: python
 
     >>> Ps = pn.pores('top')
-    >>> Ps
-    array([ 2,  5,  8, 11, 14, 17, 20, 23, 26])
-    >>> pn.filter_by_label(pores=Ps, labels='left')
-    array([ 2, 11, 20])
+    >>> list(Ps)
+    [2, 5, 8, 11, 14, 17, 20, 23, 26]
+    >>> list(pn.filter_by_label(pores=Ps, labels='left'))
+    [2, 11, 20]
 
 The ``filter_by_label`` method also accepts a ``mode`` argument that applies additional filtering to the returned list using *set-theory*-type logic.  In this case, the method will find sets of pores or throats that satisfies each given label, then determines the *union*, *intersection*, or *difference* of the given sets.
 

diff --git a/test/unit/Base/CoreTest.py b/test/unit/Base/CoreTest.py
@@ -908,3 +908,92 @@ def test_check_data_health(self):
         assert not a.health
         assert a['pore.data_test'] == 'Has NaNs'
         del self.net['pore.data_test']
+
+    def test_interleave_data_key_error(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        Ps = net.pores('top')
+        geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        Ps = net.pores('bottom')
+        geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        with pytest.raises(KeyError):
+            net['pore.blah']
+        with pytest.raises(KeyError):
+            geom1['pore.blah']
+        with pytest.raises(KeyError):
+            geom2['pore.blah']
+
+    def test_interleave_data_bool(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        Ps = net.pores('top')
+        geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        Ps = net.pores('bottom')
+        geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        # Ensure Falses return in missing places
+        geom1['pore.blah'] = True
+        assert sp.all(~geom2['pore.blah'])
+        assert sp.sum(net['pore.blah']) == 4
+        # Ensure all Trues returned now
+        geom2['pore.blah'] = True
+        assert sp.all(geom2['pore.blah'])
+        assert sp.sum(net['pore.blah']) == 8
+
+    def test_interleave_data_int(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        Ps = net.pores('top')
+        geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        Ps = net.pores('bottom')
+        geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        geom1['pore.blah'] = 1
+        # Ensure ints are returned geom1
+        assert 'int' in geom1['pore.blah'].dtype.name
+        # Ensure nans are returned on geom2
+        assert sp.all(sp.isnan(geom2['pore.blah']))
+        # Ensure interleaved array is float with nans
+        assert 'float' in net['pore.blah'].dtype.name
+        # Ensure missing values are floats
+        assert sp.sum(sp.isnan(net['pore.blah'])) == 4
+
+    def test_interleave_data_float(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        Ps = net.pores('top')
+        geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        Ps = net.pores('bottom')
+        geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        geom1['pore.blah'] = 1.0
+        # Ensure flaots are returned geom1
+        assert 'float' in geom1['pore.blah'].dtype.name
+        # Ensure nans are returned on geom2
+        assert sp.all(sp.isnan(geom2['pore.blah']))
+        # Ensure interleaved array is float with nans
+        assert 'float' in net['pore.blah'].dtype.name
+        # Ensure missing values are floats
+        assert sp.sum(sp.isnan(net['pore.blah'])) == 4
+
+    def test_interleave_data_object(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        Ps = net.pores('top')
+        geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        Ps = net.pores('bottom')
+        geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
+        geom1['pore.blah'] = [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1]]
+        assert 'object' in net['pore.blah'].dtype.name
+        # Ensure missing elements are None
+        assert sp.sum([item is None for item in net['pore.blah']]) == 4
+
+    def test_interleave_data_float_missing_geometry(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
+        geom['pore.blah'] = 1.0
+        assert sp.any(sp.isnan(net['pore.blah']))
+
+    def test_interleave_data_int_missing_geometry(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
+        geom['pore.blah'] = 1
+        assert sp.any(sp.isnan(net['pore.blah']))
+
+    def test_interleave_data_bool_missing_geometry(self):
+        net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
+        geom = OpenPNM.Geometry.GenericGeometry(network=net, pores=[0, 1, 2])
+        geom['pore.blah'] = True
+        assert sp.sum(net['pore.blah']) == geom.Np
diff --git a/test/unit/Utilities/UtilitiesMiscTest.py b/test/unit/Utilities/UtilitiesMiscTest.py
@@ -1,6 +1,7 @@
 import OpenPNM
 import OpenPNM.Utilities.misc as misc
 import scipy as sp
+import time
 
 
 class UtilitiesMiscTest:
@@ -41,6 +42,7 @@ def test_tic_toc(self):
         assert t is None
         t = misc.toc(quiet=False)
         assert t is None
+        time.sleep(0.005)
         t = misc.toc(quiet=True)
         assert t > 0