From b59f30281dd5d9732ab8f216285522395679e3a2 Mon Sep 17 00:00:00 2001 From: {{ <{{> Date: Thu, 1 Sep 2022 09:03:38 -0700 Subject: [PATCH 1/2] implementation of analytic moment bound for SGM --- opacus/accountants/__init__.py | 3 + opacus/accountants/analysis/rdp.py | 108 ++++++++++++++++++++++ opacus/accountants/rdp_analytic_moment.py | 88 ++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 opacus/accountants/rdp_analytic_moment.py diff --git a/opacus/accountants/__init__.py b/opacus/accountants/__init__.py index 9e9725e6..decb4de0 100644 --- a/opacus/accountants/__init__.py +++ b/opacus/accountants/__init__.py @@ -15,6 +15,7 @@ from .accountant import IAccountant from .gdp import GaussianAccountant from .rdp import RDPAccountant +from .rdp import RDPAccountantAnalyticMoment __all__ = [ @@ -29,5 +30,7 @@ def create_accountant(mechanism: str) -> IAccountant: return RDPAccountant() elif mechanism == "gdp": return GaussianAccountant() + elif mechanism == "rdp_analytic": + return RDPAccountantAnalyticMoment() raise ValueError(f"Unexpected accounting mechanism: {mechanism}") diff --git a/opacus/accountants/analysis/rdp.py b/opacus/accountants/analysis/rdp.py index c83dc05f..18671bd5 100644 --- a/opacus/accountants/analysis/rdp.py +++ b/opacus/accountants/analysis/rdp.py @@ -333,3 +333,111 @@ def get_privacy_spent( f"Optimal order is the {extreme} alpha. Please consider expanding the range of alphas to get a tighter privacy bound." ) return eps[idx_opt], orders_vec[idx_opt] + +# ================================== +# Functions for General Composition +# ================================== + +# Functions used for implementation of Wang's Generalized analytic moment bounds for subsampled mechanisms +# based on theorem 9 of https://arxiv.org/pdf/1808.00087.pdf and implementation https://github.com/yuxiangw/autodp/blob/master/autodp/rdp_acct.py + +def _SGM_compute_rdp_subssample(q: float,sigma: float, alpha: float) -> float: + r"""Computes bound of RDP of the Sampled Mechanism at order ``alpha``. + Args: + q: Subsampling rate of + sigma: The standard deviation of the additive Gaussian noise. + alpha: The order at which RDP is computed. + Returns: + RDP at order ``alpha``; can be np.inf. + """ + # SGM rdp calculation: + def func(x): + return alpha / (2*sigma**2) + + if q == 0: + return 0 + + if q == 1.0: + return func(alpha) + + if np.isinf(alpha): + return np.inf + + def cgf(x): + return x * func(x+1) + + # since calculations rely on binomial expansion - we do the calculation for integer alpha and then interpolate + def subsample_func_int(x): + # output the cgf of the subsampled mechanism + mm = int(x) + eps_inf = func(np.inf) + eps_two = func(2.0) + + moments_two = 2 * np.log(prob) + logcomb(mm,2) \ + + np.minimum(np.log(4) + eps_two + np.log(1-np.exp(-eps_two)), + eps_two + np.minimum(np.log(2), + 2 * (eps_inf+np.log(1-np.exp(-eps_inf))))) + moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), + np.log(2)) + cgf(j - 1) \ + + j * np.log(prob) + utils.logcomb(mm, j) + moments = [moment_bound(j) for j in range(3, mm + 1, 1)] + + return np.minimum((x-1)*func(x), _log_add([0,moments_two] + moments)) + + def subsample_func(x): + # This function returns the RDP at alpha = x + # RDP with the linear interpolation upper bound of the CGF + + # FROM auto_dp repo : + + # This result applies to both subsampling with replacement and Poisson subsampling. + # The result for Poisson subsmapling is due to Theorem 1 of : + # Li, Ninghui, Qardaji, Wahbeh, and Su, Dong. On sampling, anonymization, and differential privacy or, + # k-anonymization meets differential privacy + # The result for Subsampling with replacement is due to: + # Jon Ullman's lecture notes: http://www.ccs.neu.edu/home/jullman/PrivacyS17/HW1sol.pdf + # See the proof of (b) + + epsinf = np.log(1+q*(np.exp(func(np.inf))-1))) + + if np.isinf(x): + return epsinf + if q == 1.0: + return func(x) + + if (x >= 1.0) and (x <= 2.0): + return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) + if np.equal(np.mod(x, 1), 0): + return np.minimum(epsinf, subsample_func_int(x) / (x-1)) + xc = math.ceil(x) + xf = math.floor(x) + return np.min( + [epsinf,func(x), + ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)] + ) + + return subsample_func(alpha) + +def SGM_compute_general_subsampled_rdp_bound( + *, q: float, noise_multiplier: float, steps: int, orders: Union[List[float], float] +) -> Union[List[float], float]: + r"""Computes Renyi Differential Privacy (RDP) analytic moment bound for general subsampled mechanism iterated ``steps`` times. + Note that Opacus actually does Poisson subsampling so this bound in not actually valid + Args: + q: Sampling rate of the mechanism + noise_multiplier: The ratio of the standard deviation of the + additive Gaussian noise to the L2-sensitivity of the function + to which it is added. Note that this is same as the standard + deviation of the additive Gaussian noise when the L2-sensitivity + of the function is 1. + steps: The number of iterations of the mechanism. + orders: An array (or a scalar) of RDP orders. + Returns: + The RDP guarantees at all orders; can be ``np.inf``. + """ + if isinstance(orders, float): + rdp = _SGM_compute_rdp_subsample(m,q, orders) + else: + rdp = np.array([_SGM_compute_rdp_subsample(m,q, order) for order in orders]) + + return rdp * steps diff --git a/opacus/accountants/rdp_analytic_moment.py b/opacus/accountants/rdp_analytic_moment.py new file mode 100644 index 00000000..0e7ae8b0 --- /dev/null +++ b/opacus/accountants/rdp_analytic_moment.py @@ -0,0 +1,88 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple, Union + +from .accountant import IAccountant +from .analysis import rdp as privacy_analysis + +class RDPAccountantAnalyticMoment(IAccountant): + DEFAULT_ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) + + def __init__(self): + super().__init__() + + def step(self, *, noise_multiplier: float, sample_rate: float): + if len(self.history) >= 1: + last_noise_multiplier, last_sample_rate, num_steps = self.history.pop() + if ( + last_noise_multiplier == noise_multiplier + and last_sample_rate == sample_rate + ): + self.history.append( + (last_noise_multiplier, last_sample_rate, num_steps + 1) + ) + else: + self.history.append( + (last_noise_multiplier, last_sample_rate, num_steps) + ) + self.history.append((noise_multiplier, sample_rate, 1)) + + else: + self.history.append((noise_multiplier, sample_rate, 1)) + + def get_privacy_spent( + self, *, delta: float, alphas: Optional[List[Union[float, int]]] = None + ) -> Tuple[float, float]: + if not self.history: + return 0, 0 + + if alphas is None: + alphas = self.DEFAULT_ALPHAS + rdp = sum( + [ + privacy_analysis.SGM_compute_general_subsampled_rdp_bound( + q=sample_rate, + noise_multiplier=noise_multiplier, + steps=num_steps, + orders=alphas, + ) + for (noise_multiplier, sample_rate, num_steps) in self.history + ] + ) + eps, best_alpha = privacy_analysis.get_privacy_spent( + orders=alphas, rdp=rdp, delta=delta + ) + return float(eps), float(best_alpha) + + def get_epsilon( + self, delta: float, alphas: Optional[List[Union[float, int]]] = None + ): + """ + Return privacy budget (epsilon) expended so far. + + Args: + delta: target delta + alphas: List of RDP orders (alphas) used to search for the optimal conversion + between RDP and (epd, delta)-DP + """ + eps, _ = self.get_privacy_spent(delta=delta, alphas=alphas) + return eps + + def __len__(self): + return len(self.history) + + @classmethod + def mechanism(cls) -> str: + return "rdp" From c0ee4fb1b55b60ff56d68a9a1725acbf3585ad40 Mon Sep 17 00:00:00 2001 From: {{ <{{> Date: Thu, 8 Sep 2022 13:35:20 -0700 Subject: [PATCH 2/2] debugged some syntax errors --- opacus/accountants/__init__.py | 3 ++- opacus/accountants/analysis/rdp.py | 32 +++++++++++++++++------ opacus/accountants/rdp_analytic_moment.py | 2 +- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/opacus/accountants/__init__.py b/opacus/accountants/__init__.py index decb4de0..851e0bb8 100644 --- a/opacus/accountants/__init__.py +++ b/opacus/accountants/__init__.py @@ -15,13 +15,14 @@ from .accountant import IAccountant from .gdp import GaussianAccountant from .rdp import RDPAccountant -from .rdp import RDPAccountantAnalyticMoment +from .rdp_analytic_moment import RDPAccountantAnalyticMoment __all__ = [ "IAccountant", "GaussianAccountant", "RDPAccountant", + "RDPAccountantAnalyticMoment" ] diff --git a/opacus/accountants/analysis/rdp.py b/opacus/accountants/analysis/rdp.py index 18671bd5..2ad574ad 100644 --- a/opacus/accountants/analysis/rdp.py +++ b/opacus/accountants/analysis/rdp.py @@ -101,6 +101,19 @@ def _log_sub(logx: float, logy: float) -> float: except OverflowError: return logx +def _log_add_list(log_list: list) -> float: + r"""Adds a list of numbers in the log space. + + Args: + log_list + Returns: + Sum of numbers in log space. i.e. log(exp(x_1) + exp(x_2) + exp(x_3) + ....) + """ + a = np.max(log_list) + # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b) + return a + np.log(np.sum(np.exp(log_list-a))) + + def _compute_log_a_for_int_alpha(q: float, sigma: float, alpha: int) -> float: r"""Computes :math:`log(A_\alpha)` for integer ``alpha``. @@ -341,14 +354,17 @@ def get_privacy_spent( # Functions used for implementation of Wang's Generalized analytic moment bounds for subsampled mechanisms # based on theorem 9 of https://arxiv.org/pdf/1808.00087.pdf and implementation https://github.com/yuxiangw/autodp/blob/master/autodp/rdp_acct.py -def _SGM_compute_rdp_subssample(q: float,sigma: float, alpha: float) -> float: +def logcomb(n, k): + return (special.gammaln(n+1) - special.gammaln(n-k+1) - special.gammaln(k+1)) + +def _SGM_compute_rdp_subsample(q: float,sigma: float, alpha: float) -> float: r"""Computes bound of RDP of the Sampled Mechanism at order ``alpha``. Args: q: Subsampling rate of sigma: The standard deviation of the additive Gaussian noise. alpha: The order at which RDP is computed. Returns: - RDP at order ``alpha``; can be np.inf. + RDP at order ``alpha``; can be np.inf.s """ # SGM rdp calculation: def func(x): @@ -373,16 +389,16 @@ def subsample_func_int(x): eps_inf = func(np.inf) eps_two = func(2.0) - moments_two = 2 * np.log(prob) + logcomb(mm,2) \ + moments_two = 2 * np.log(q) + logcomb(mm,2) \ + np.minimum(np.log(4) + eps_two + np.log(1-np.exp(-eps_two)), eps_two + np.minimum(np.log(2), 2 * (eps_inf+np.log(1-np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ - + j * np.log(prob) + utils.logcomb(mm, j) + + j * np.log(q) + logcomb(mm, j) moments = [moment_bound(j) for j in range(3, mm + 1, 1)] - return np.minimum((x-1)*func(x), _log_add([0,moments_two] + moments)) + return np.minimum((x-1)*func(x), _log_add_list([0,moments_two] + moments)) def subsample_func(x): # This function returns the RDP at alpha = x @@ -398,7 +414,7 @@ def subsample_func(x): # Jon Ullman's lecture notes: http://www.ccs.neu.edu/home/jullman/PrivacyS17/HW1sol.pdf # See the proof of (b) - epsinf = np.log(1+q*(np.exp(func(np.inf))-1))) + epsinf = np.log(1+q*(np.exp(func(np.inf))-1)) if np.isinf(x): return epsinf @@ -436,8 +452,8 @@ def SGM_compute_general_subsampled_rdp_bound( The RDP guarantees at all orders; can be ``np.inf``. """ if isinstance(orders, float): - rdp = _SGM_compute_rdp_subsample(m,q, orders) + rdp = _SGM_compute_rdp_subsample(q,noise_multiplier,orders) else: - rdp = np.array([_SGM_compute_rdp_subsample(m,q, order) for order in orders]) + rdp = np.array([_SGM_compute_rdp_subsample(q,noise_multiplier, order) for order in orders]) return rdp * steps diff --git a/opacus/accountants/rdp_analytic_moment.py b/opacus/accountants/rdp_analytic_moment.py index 0e7ae8b0..770c6b7e 100644 --- a/opacus/accountants/rdp_analytic_moment.py +++ b/opacus/accountants/rdp_analytic_moment.py @@ -85,4 +85,4 @@ def __len__(self): @classmethod def mechanism(cls) -> str: - return "rdp" + return "rdp_analytic"