From 04ecf5490796ce023192076e35e8ffe815da8f62 Mon Sep 17 00:00:00 2001
From: Paul Romano <paul.k.romano@gmail.com>
Date: Fri, 11 Oct 2024 06:13:10 -0500
Subject: [PATCH] Improve clipping of Mixture distributions (#3154)

Co-authored-by: Ethan Peterson <eepeterson3@gmail.com>
---
 openmc/material.py                            |   7 +-
 openmc/stats/univariate.py                    | 126 ++++++++++++------
 src/distribution.cpp                          |   3 +
 tests/regression_tests/source/inputs_true.dat |  18 +--
 tests/unit_tests/test_stats.py                |  18 ++-
 5 files changed, 122 insertions(+), 50 deletions(-)

diff --git a/openmc/material.py b/openmc/material.py
index 63994b3055f..1213ea669d4 100644
--- a/openmc/material.py
+++ b/openmc/material.py
@@ -326,7 +326,7 @@ def get_decay_photon_energy(
         probs = []
         for nuc, atoms_per_bcm in self.get_nuclide_atom_densities().items():
             source_per_atom = openmc.data.decay_photon_energy(nuc)
-            if source_per_atom is not None:
+            if source_per_atom is not None and atoms_per_bcm > 0.0:
                 dists.append(source_per_atom)
                 probs.append(1e24 * atoms_per_bcm * multiplier)
 
@@ -339,6 +339,11 @@ def get_decay_photon_energy(
         if isinstance(combined, (Discrete, Mixture)):
             combined.clip(clip_tolerance, inplace=True)
 
+        # If clipping resulted in a single distribution within a mixture, pick
+        # out that single distribution
+        if isinstance(combined, Mixture) and len(combined.distribution) == 1:
+            combined = combined.distribution[0]
+
         return combined
 
     @classmethod
diff --git a/openmc/stats/univariate.py b/openmc/stats/univariate.py
index 10822c06fa7..0dc6f385685 100644
--- a/openmc/stats/univariate.py
+++ b/openmc/stats/univariate.py
@@ -97,6 +97,45 @@ def integral(self):
         return 1.0
 
 
+def _intensity_clip(intensity: Sequence[float], tolerance: float = 1e-6) -> np.ndarray:
+    """Clip low-importance points from an array of intensities.
+
+    Given an array of intensities, this function returns an array of indices for
+    points that contribute non-negligibly to the total sum of intensities.
+
+    Parameters
+    ----------
+    intensity : sequence of float
+        Intensities in arbitrary units.
+    tolerance : float
+        Maximum fraction of intensities that will be discarded.
+
+    Returns
+    -------
+    Array of indices
+
+    """
+    # Get indices of intensities from largest to smallest
+    index_sort = np.argsort(intensity)[::-1]
+
+    # Get intensities from largest to smallest
+    sorted_intensity = np.asarray(intensity)[index_sort]
+
+    # Determine cumulative sum of probabilities
+    cumsum = np.cumsum(sorted_intensity)
+    cumsum /= cumsum[-1]
+
+    # Find index that satisfies cutoff
+    index_cutoff = np.searchsorted(cumsum, 1.0 - tolerance)
+
+    # Now get indices up to cutoff
+    new_indices = index_sort[:index_cutoff + 1]
+
+    # Put back in the order of the original array and return
+    new_indices.sort()
+    return new_indices
+
+
 class Discrete(Univariate):
     """Distribution characterized by a probability mass function.
 
@@ -283,32 +322,20 @@ def clip(self, tolerance: float = 1e-6, inplace: bool = False) -> Discrete:
         cv.check_less_than("tolerance", tolerance, 1.0, equality=True)
         cv.check_greater_than("tolerance", tolerance, 0.0, equality=True)
 
-        # Determine (reversed) sorted order of probabilities
+        # Compute intensities
         intensity = self.p * self.x
-        index_sort = np.argsort(intensity)[::-1]
 
-        # Get probabilities in above order
-        sorted_intensity = intensity[index_sort]
-
-        # Determine cumulative sum of probabilities
-        cumsum = np.cumsum(sorted_intensity)
-        cumsum /= cumsum[-1]
-
-        # Find index which satisfies cutoff
-        index_cutoff = np.searchsorted(cumsum, 1.0 - tolerance)
-
-        # Now get indices up to cutoff
-        new_indices = index_sort[:index_cutoff + 1]
-        new_indices.sort()
+        # Get indices for intensities above threshold
+        indices = _intensity_clip(intensity, tolerance=tolerance)
 
         # Create new discrete distribution
         if inplace:
-            self.x = self.x[new_indices]
-            self.p = self.p[new_indices]
+            self.x = self.x[indices]
+            self.p = self.p[indices]
             return self
         else:
-            new_x = self.x[new_indices]
-            new_p = self.p[new_indices]
+            new_x = self.x[indices]
+            new_p = self.p[indices]
             return type(self)(new_x, new_p)
 
 
@@ -1206,7 +1233,7 @@ def probability(self, probability):
         for p in probability:
             cv.check_greater_than('mixture distribution probabilities',
                                   p, 0.0, True)
-        self._probability = probability
+        self._probability = np.array(probability, dtype=float)
 
     @property
     def distribution(self):
@@ -1312,40 +1339,63 @@ def integral(self):
         ])
 
     def clip(self, tolerance: float = 1e-6, inplace: bool = False) -> Mixture:
-        r"""Remove low-importance points from contained discrete distributions.
+        r"""Remove low-importance points / distributions
 
-        Given a probability mass function :math:`p(x)` with :math:`\{x_1, x_2,
-        x_3, \dots\}` the possible values of the random variable with
-        corresponding probabilities :math:`\{p_1, p_2, p_3, \dots\}`, this
-        function will remove any low-importance points such that :math:`\sum_i
-        x_i p_i` is preserved to within some threshold.
+        Like :meth:`Discrete.clip`, this method will remove low-importance
+        points from discrete distributions contained within the mixture but it
+        will also clip any distributions that have negligible contributions to
+        the overall intensity.
 
         .. versionadded:: 0.14.0
 
         Parameters
         ----------
         tolerance : float
-            Maximum fraction of :math:`\sum_i x_i p_i` that will be discarded
-            for any discrete distributions within the mixture distribution.
+            Maximum fraction of intensities that will be discarded.
         inplace : bool
             Whether to modify the current object in-place or return a new one.
 
         Returns
         -------
-        Discrete distribution with low-importance points removed
+        Distribution with low-importance points / distributions removed
 
         """
+        # Determine integral of original distribution to compare later
+        original_integral = self.integral()
+
+        # Determine indices for any distributions that contribute non-negligibly
+        # to overall intensity
+        intensities = [prob*dist.integral() for prob, dist in
+                       zip(self.probability, self.distribution)]
+        indices = _intensity_clip(intensities, tolerance=tolerance)
+
+        # Clip mixture of distributions
+        probability = self.probability[indices]
+        distribution = [self.distribution[i] for i in indices]
+
+        # Clip points from Discrete distributions
+        distribution = [
+            dist.clip(tolerance, inplace) if isinstance(dist, Discrete) else dist
+            for dist in distribution
+        ]
+
         if inplace:
-            for dist in self.distribution:
-                if isinstance(dist, Discrete):
-                    dist.clip(tolerance, inplace=True)
-            return self
+            # Set attributes of current object and return
+            self.probability = probability
+            self.distribution = distribution
+            new_dist = self
         else:
-            distribution = [
-                dist.clip(tolerance) if isinstance(dist, Discrete) else dist
-                for dist in self.distribution
-            ]
-            return type(self)(self.probability, distribution)
+            # Create new distribution
+            new_dist = type(self)(probability, distribution)
+
+        # Show warning if integral of new distribution is not within
+        # tolerance of original
+        diff = (original_integral - new_dist.integral())/original_integral
+        if diff > tolerance:
+            warn("Clipping mixture distribution resulted in an integral that is "
+                    f"lower by a fraction of {diff} when tolerance={tolerance}.")
+
+        return new_dist
 
 
 def combine_distributions(
diff --git a/src/distribution.cpp b/src/distribution.cpp
index 3026630b335..a6b4acd58b1 100644
--- a/src/distribution.cpp
+++ b/src/distribution.cpp
@@ -394,6 +394,9 @@ Mixture::Mixture(pugi::xml_node node)
     distribution_.push_back(std::make_pair(cumsum, std::move(dist)));
   }
 
+  // Save integral of distribution
+  integral_ = cumsum;
+
   // Normalize cummulative probabilities to 1
   for (auto& pair : distribution_) {
     pair.first /= cumsum;
diff --git a/tests/regression_tests/source/inputs_true.dat b/tests/regression_tests/source/inputs_true.dat
index c1a616dd109..e787f24d413 100644
--- a/tests/regression_tests/source/inputs_true.dat
+++ b/tests/regression_tests/source/inputs_true.dat
@@ -85,13 +85,13 @@
       </space>
       <angle type="isotropic"/>
       <energy type="mixture">
-        <pair probability="1">
+        <pair probability="1.0">
           <dist parameters="1289500.0" type="maxwell"/>
         </pair>
-        <pair probability="2">
+        <pair probability="2.0">
           <dist parameters="988000.0 2.249e-06" type="watt"/>
         </pair>
-        <pair probability="3">
+        <pair probability="3.0">
           <dist interpolation="histogram" type="tabular">
             <parameters>1.0 1.3894954943731377 1.93069772888325 2.6826957952797255 3.72759372031494 5.17947467923121 7.196856730011519 10.0 13.894954943731374 19.306977288832496 26.826957952797247 37.2759372031494 51.7947467923121 71.96856730011518 100.0 138.94954943731375 193.06977288832496 268.26957952797244 372.7593720314938 517.9474679231207 719.6856730011514 1000.0 1389.4954943731375 1930.6977288832495 2682.6957952797247 3727.593720314938 5179.474679231207 7196.856730011514 10000.0 13894.95494373136 19306.977288832495 26826.95795279722 37275.93720314938 51794.74679231213 71968.56730011514 100000.0 138949.5494373136 193069.77288832495 268269.5795279722 372759.3720314938 517947.4679231202 719685.6730011514 1000000.0 1389495.494373136 1930697.7288832497 2682695.7952797217 3727593.720314938 5179474.679231202 7196856.730011513 10000000.0 0.0 2.9086439299358713e-08 5.80533561806147e-08 8.67817193689187e-08 1.1515347785771536e-07 1.4305204600565115e-07 1.7036278261198208e-07 1.9697346200185813e-07 2.227747351856934e-07 2.4766057919761985e-07 2.715287327665956e-07 2.9428111652990295e-07 3.1582423606228735e-07 3.360695660646056e-07 3.549339141332686e-07 3.723397626156721e-07 3.882155871468592e-07 4.024961505584776e-07 4.151227709522976e-07 4.260435628367196e-07 4.3521365033538783e-07 4.4259535159179273e-07 4.4815833361210174e-07 4.5187973690993757e-07 4.5374426944091084e-07 4.5374426944091084e-07 4.5187973690993757e-07 4.4815833361210174e-07 4.4259535159179273e-07 4.352136503353879e-07 4.2604356283671966e-07 4.1512277095229767e-07 4.0249615055847764e-07 3.8821558714685926e-07 3.723397626156722e-07 3.5493391413326864e-07 3.360695660646057e-07 3.158242360622874e-07 2.942811165299031e-07 2.715287327665957e-07 2.4766057919762e-07 2.2277473518569352e-07 1.9697346200185819e-07 1.7036278261198226e-07 1.4305204600565126e-07 1.1515347785771556e-07 8.678171936891881e-08 5.805335618061493e-08 2.9086439299358858e-08 5.559621115282002e-23</parameters>
           </dist>
@@ -108,13 +108,13 @@
       </space>
       <angle type="isotropic"/>
       <energy type="mixture">
-        <pair probability="1">
+        <pair probability="1.0">
           <dist parameters="1289500.0" type="maxwell"/>
         </pair>
-        <pair probability="2">
+        <pair probability="2.0">
           <dist parameters="988000.0 2.249e-06" type="watt"/>
         </pair>
-        <pair probability="3">
+        <pair probability="3.0">
           <dist interpolation="histogram" type="tabular">
             <parameters>1.0 1.3894954943731377 1.93069772888325 2.6826957952797255 3.72759372031494 5.17947467923121 7.196856730011519 10.0 13.894954943731374 19.306977288832496 26.826957952797247 37.2759372031494 51.7947467923121 71.96856730011518 100.0 138.94954943731375 193.06977288832496 268.26957952797244 372.7593720314938 517.9474679231207 719.6856730011514 1000.0 1389.4954943731375 1930.6977288832495 2682.6957952797247 3727.593720314938 5179.474679231207 7196.856730011514 10000.0 13894.95494373136 19306.977288832495 26826.95795279722 37275.93720314938 51794.74679231213 71968.56730011514 100000.0 138949.5494373136 193069.77288832495 268269.5795279722 372759.3720314938 517947.4679231202 719685.6730011514 1000000.0 1389495.494373136 1930697.7288832497 2682695.7952797217 3727593.720314938 5179474.679231202 7196856.730011513 10000000.0 0.0 2.9086439299358713e-08 5.80533561806147e-08 8.67817193689187e-08 1.1515347785771536e-07 1.4305204600565115e-07 1.7036278261198208e-07 1.9697346200185813e-07 2.227747351856934e-07 2.4766057919761985e-07 2.715287327665956e-07 2.9428111652990295e-07 3.1582423606228735e-07 3.360695660646056e-07 3.549339141332686e-07 3.723397626156721e-07 3.882155871468592e-07 4.024961505584776e-07 4.151227709522976e-07 4.260435628367196e-07 4.3521365033538783e-07 4.4259535159179273e-07 4.4815833361210174e-07 4.5187973690993757e-07 4.5374426944091084e-07 4.5374426944091084e-07 4.5187973690993757e-07 4.4815833361210174e-07 4.4259535159179273e-07 4.352136503353879e-07 4.2604356283671966e-07 4.1512277095229767e-07 4.0249615055847764e-07 3.8821558714685926e-07 3.723397626156722e-07 3.5493391413326864e-07 3.360695660646057e-07 3.158242360622874e-07 2.942811165299031e-07 2.715287327665957e-07 2.4766057919762e-07 2.2277473518569352e-07 1.9697346200185819e-07 1.7036278261198226e-07 1.4305204600565126e-07 1.1515347785771556e-07 8.678171936891881e-08 5.805335618061493e-08 2.9086439299358858e-08 5.559621115282002e-23</parameters>
           </dist>
@@ -132,13 +132,13 @@
       </space>
       <angle type="isotropic"/>
       <energy type="mixture">
-        <pair probability="1">
+        <pair probability="1.0">
           <dist parameters="1289500.0" type="maxwell"/>
         </pair>
-        <pair probability="2">
+        <pair probability="2.0">
           <dist parameters="988000.0 2.249e-06" type="watt"/>
         </pair>
-        <pair probability="3">
+        <pair probability="3.0">
           <dist interpolation="histogram" type="tabular">
             <parameters>1.0 1.3894954943731377 1.93069772888325 2.6826957952797255 3.72759372031494 5.17947467923121 7.196856730011519 10.0 13.894954943731374 19.306977288832496 26.826957952797247 37.2759372031494 51.7947467923121 71.96856730011518 100.0 138.94954943731375 193.06977288832496 268.26957952797244 372.7593720314938 517.9474679231207 719.6856730011514 1000.0 1389.4954943731375 1930.6977288832495 2682.6957952797247 3727.593720314938 5179.474679231207 7196.856730011514 10000.0 13894.95494373136 19306.977288832495 26826.95795279722 37275.93720314938 51794.74679231213 71968.56730011514 100000.0 138949.5494373136 193069.77288832495 268269.5795279722 372759.3720314938 517947.4679231202 719685.6730011514 1000000.0 1389495.494373136 1930697.7288832497 2682695.7952797217 3727593.720314938 5179474.679231202 7196856.730011513 10000000.0 0.0 2.9086439299358713e-08 5.80533561806147e-08 8.67817193689187e-08 1.1515347785771536e-07 1.4305204600565115e-07 1.7036278261198208e-07 1.9697346200185813e-07 2.227747351856934e-07 2.4766057919761985e-07 2.715287327665956e-07 2.9428111652990295e-07 3.1582423606228735e-07 3.360695660646056e-07 3.549339141332686e-07 3.723397626156721e-07 3.882155871468592e-07 4.024961505584776e-07 4.151227709522976e-07 4.260435628367196e-07 4.3521365033538783e-07 4.4259535159179273e-07 4.4815833361210174e-07 4.5187973690993757e-07 4.5374426944091084e-07 4.5374426944091084e-07 4.5187973690993757e-07 4.4815833361210174e-07 4.4259535159179273e-07 4.352136503353879e-07 4.2604356283671966e-07 4.1512277095229767e-07 4.0249615055847764e-07 3.8821558714685926e-07 3.723397626156722e-07 3.5493391413326864e-07 3.360695660646057e-07 3.158242360622874e-07 2.942811165299031e-07 2.715287327665957e-07 2.4766057919762e-07 2.2277473518569352e-07 1.9697346200185819e-07 1.7036278261198226e-07 1.4305204600565126e-07 1.1515347785771556e-07 8.678171936891881e-08 5.805335618061493e-08 2.9086439299358858e-08 5.559621115282002e-23</parameters>
           </dist>
diff --git a/tests/unit_tests/test_stats.py b/tests/unit_tests/test_stats.py
index 0414fc22559..643e115564b 100644
--- a/tests/unit_tests/test_stats.py
+++ b/tests/unit_tests/test_stats.py
@@ -262,7 +262,7 @@ def test_mixture():
     d2 = openmc.stats.Uniform(3, 7)
     p = [0.5, 0.5]
     mix = openmc.stats.Mixture(p, [d1, d2])
-    assert mix.probability == p
+    np.testing.assert_allclose(mix.probability, p)
     assert mix.distribution == [d1, d2]
     assert len(mix) == 4
 
@@ -274,7 +274,7 @@ def test_mixture():
     elem = mix.to_xml_element('distribution')
 
     d = openmc.stats.Mixture.from_xml_element(elem)
-    assert d.probability == p
+    np.testing.assert_allclose(d.probability, p)
     assert d.distribution == [d1, d2]
     assert len(d) == 4
 
@@ -296,6 +296,20 @@ def test_mixture_clip():
     mix_same = mix.clip(1e-6, inplace=True)
     assert mix_same is mix
 
+    # Make sure clip removes low probability distributions
+    d_small = openmc.stats.Uniform(0., 1.)
+    d_large = openmc.stats.Uniform(2., 5.)
+    mix = openmc.stats.Mixture([1e-10, 1.0], [d_small, d_large])
+    mix_clip = mix.clip(1e-3)
+    assert mix_clip.distribution == [d_large]
+
+    # Make sure warning is raised if tolerance is exceeded
+    d1 = openmc.stats.Discrete([1.0, 1.001], [1.0, 0.7e-6])
+    d2 = openmc.stats.Tabular([0.0, 1.0], [0.7e-6], interpolation='histogram')
+    mix = openmc.stats.Mixture([1.0, 1.0], [d1, d2])
+    with pytest.warns(UserWarning):
+        mix_clip = mix.clip(1e-6)
+
 
 def test_polar_azimuthal():
     # default polar-azimuthal should be uniform in mu and phi