pytorch · duncan-clark · Sep 1, 2022 · Sep 8, 2022
diff --git a/opacus/accountants/__init__.py b/opacus/accountants/__init__.py
@@ -15,12 +15,14 @@
 from .accountant import IAccountant
 from .gdp import GaussianAccountant
 from .rdp import RDPAccountant
+from .rdp_analytic_moment import RDPAccountantAnalyticMoment
 
 
 __all__ = [
     "IAccountant",
     "GaussianAccountant",
     "RDPAccountant",
+    "RDPAccountantAnalyticMoment"
 ]
 
 
@@ -29,5 +31,7 @@ def create_accountant(mechanism: str) -> IAccountant:
         return RDPAccountant()
     elif mechanism == "gdp":
         return GaussianAccountant()
+    elif mechanism == "rdp_analytic":
+        return RDPAccountantAnalyticMoment()
 
     raise ValueError(f"Unexpected accounting mechanism: {mechanism}")
diff --git a/opacus/accountants/analysis/rdp.py b/opacus/accountants/analysis/rdp.py
@@ -101,6 +101,19 @@ def _log_sub(logx: float, logy: float) -> float:
     except OverflowError:
         return logx
 
+def _log_add_list(log_list: list) -> float:
+    r"""Adds a list of numbers in the log space.
+
+    Args:
+        log_list
+    Returns:
+        Sum of numbers in log space. i.e. log(exp(x_1) + exp(x_2) + exp(x_3) + ....)
+    """
+    a = np.max(log_list)
+    # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
+    return a + np.log(np.sum(np.exp(log_list-a)))
+
+
 
 def _compute_log_a_for_int_alpha(q: float, sigma: float, alpha: int) -> float:
     r"""Computes :math:`log(A_\alpha)` for integer ``alpha``.
@@ -333,3 +346,114 @@ def get_privacy_spent(
             f"Optimal order is the {extreme} alpha. Please consider expanding the range of alphas to get a tighter privacy bound."
         )
     return eps[idx_opt], orders_vec[idx_opt]
+
+# ==================================
+# Functions for General Composition
+# ==================================
+
+# Functions used for implementation of Wang's Generalized analytic moment bounds for subsampled mechanisms
+# based on theorem 9 of https://arxiv.org/pdf/1808.00087.pdf and implementation  https://github.com/yuxiangw/autodp/blob/master/autodp/rdp_acct.py
+
+def logcomb(n, k):
+    return (special.gammaln(n+1) - special.gammaln(n-k+1) - special.gammaln(k+1))
+
+def _SGM_compute_rdp_subsample(q: float,sigma: float, alpha: float) -> float:
+    r"""Computes bound of RDP of the Sampled Mechanism at order ``alpha``.
+    Args:
+        q: Subsampling rate of
+        sigma: The standard deviation of the additive Gaussian noise.
+        alpha: The order at which RDP is computed.
+    Returns:
+        RDP at order ``alpha``; can be np.inf.s
+    """
+    # SGM rdp calculation:
+    def func(x):
+        return alpha / (2*sigma**2)
+
+    if q == 0:
+        return 0
+
+    if q == 1.0:
+        return func(alpha)
+
+    if np.isinf(alpha):
+        return np.inf
+
+    def cgf(x):
+        return x * func(x+1)
+
+    # since calculations rely on binomial expansion - we do the calculation for integer alpha and then interpolate
+    def subsample_func_int(x):
+        # output the cgf of the subsampled mechanism
+        mm = int(x)
+        eps_inf = func(np.inf)
+        eps_two = func(2.0)
+
+        moments_two = 2 * np.log(q) + logcomb(mm,2) \
+                        + np.minimum(np.log(4) + eps_two + np.log(1-np.exp(-eps_two)),
+                                    eps_two + np.minimum(np.log(2),
+                                                2 * (eps_inf+np.log(1-np.exp(-eps_inf)))))
+        moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
+                                            np.log(2)) + cgf(j - 1) \
+                                    + j * np.log(q) + logcomb(mm, j)
+        moments = [moment_bound(j) for j in range(3, mm + 1, 1)]
+
+        return np.minimum((x-1)*func(x), _log_add_list([0,moments_two] + moments))
+
+    def subsample_func(x):
+        # This function returns the RDP at alpha = x
+        # RDP with the linear interpolation upper bound of the CGF
+
+        # FROM auto_dp repo :
+
+        # This result applies to both subsampling with replacement and Poisson subsampling.
+        # The result for Poisson subsmapling is due to Theorem 1 of :
+        # Li, Ninghui, Qardaji, Wahbeh, and Su, Dong. On sampling, anonymization, and differential privacy or,
+        # k-anonymization meets differential privacy
+        # The result for Subsampling with replacement is due to:
+        # Jon Ullman's lecture notes: http://www.ccs.neu.edu/home/jullman/PrivacyS17/HW1sol.pdf
+        # See the proof of (b)
+
+        epsinf =  np.log(1+q*(np.exp(func(np.inf))-1))
+
+        if np.isinf(x):
+            return epsinf
+        if q == 1.0:
+            return func(x)
+
+        if (x >= 1.0) and (x <= 2.0):
+            return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
+        if np.equal(np.mod(x, 1), 0):
+            return np.minimum(epsinf, subsample_func_int(x) / (x-1))
+        xc = math.ceil(x)
+        xf = math.floor(x)
+        return np.min(
+            [epsinf,func(x),
+                ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)]
+        )
+
+    return subsample_func(alpha)
+
+def SGM_compute_general_subsampled_rdp_bound(
+    *, q: float, noise_multiplier: float, steps: int, orders: Union[List[float], float]
+) -> Union[List[float], float]:
+    r"""Computes Renyi Differential Privacy (RDP) analytic moment bound for general subsampled mechanism iterated ``steps`` times.
+    Note that Opacus actually does Poisson subsampling so this bound in not actually valid
+    Args:
+        q: Sampling rate of the mechanism
+        noise_multiplier: The ratio of the standard deviation of the
+            additive Gaussian noise to the L2-sensitivity of the function
+            to which it is added. Note that this is same as the standard
+            deviation of the additive Gaussian noise when the L2-sensitivity
+            of the function is 1.
+        steps: The number of iterations of the mechanism.
+        orders: An array (or a scalar) of RDP orders.
+    Returns:
+        The RDP guarantees at all orders; can be ``np.inf``.
+    """
+    if isinstance(orders, float):
+        rdp = _SGM_compute_rdp_subsample(q,noise_multiplier,orders)
+    else:
+        rdp = np.array([_SGM_compute_rdp_subsample(q,noise_multiplier, order) for order in orders])
+
+    return rdp * steps
diff --git a/opacus/accountants/rdp_analytic_moment.py b/opacus/accountants/rdp_analytic_moment.py
@@ -0,0 +1,88 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Tuple, Union
+
+from .accountant import IAccountant
+from .analysis import rdp as privacy_analysis
+
+class RDPAccountantAnalyticMoment(IAccountant):
+    DEFAULT_ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))
+
+    def __init__(self):
+        super().__init__()
+
+    def step(self, *, noise_multiplier: float, sample_rate: float):
+        if len(self.history) >= 1:
+            last_noise_multiplier, last_sample_rate, num_steps = self.history.pop()
+            if (
+                last_noise_multiplier == noise_multiplier
+                and last_sample_rate == sample_rate
+            ):
+                self.history.append(
+                    (last_noise_multiplier, last_sample_rate, num_steps + 1)
+                )
+            else:
+                self.history.append(
+                    (last_noise_multiplier, last_sample_rate, num_steps)
+                )
+                self.history.append((noise_multiplier, sample_rate, 1))
+
+        else:
+            self.history.append((noise_multiplier, sample_rate, 1))
+
+    def get_privacy_spent(
+        self, *, delta: float, alphas: Optional[List[Union[float, int]]] = None
+    ) -> Tuple[float, float]:
+        if not self.history:
+            return 0, 0
+
+        if alphas is None:
+            alphas = self.DEFAULT_ALPHAS
+        rdp = sum(
+            [
+                privacy_analysis.SGM_compute_general_subsampled_rdp_bound(
+                    q=sample_rate,
+                    noise_multiplier=noise_multiplier,
+                    steps=num_steps,
+                    orders=alphas,
+                )
+                for (noise_multiplier, sample_rate, num_steps) in self.history
+            ]
+        )
+        eps, best_alpha = privacy_analysis.get_privacy_spent(
+            orders=alphas, rdp=rdp, delta=delta
+        )
+        return float(eps), float(best_alpha)
+
+    def get_epsilon(
+        self, delta: float, alphas: Optional[List[Union[float, int]]] = None
+    ):
+        """
+        Return privacy budget (epsilon) expended so far.
+
+        Args:
+            delta: target delta
+            alphas: List of RDP orders (alphas) used to search for the optimal conversion
+                between RDP and (epd, delta)-DP
+        """
+        eps, _ = self.get_privacy_spent(delta=delta, alphas=alphas)
+        return eps
+
+    def __len__(self):
+        return len(self.history)
+
+    @classmethod
+    def mechanism(cls) -> str:
+        return "rdp_analytic"