Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sgm analytic moments #506

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions opacus/accountants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
from .accountant import IAccountant
from .gdp import GaussianAccountant
from .rdp import RDPAccountant
from .rdp_analytic_moment import RDPAccountantAnalyticMoment


__all__ = [
"IAccountant",
"GaussianAccountant",
"RDPAccountant",
"RDPAccountantAnalyticMoment"
]


Expand All @@ -29,5 +31,7 @@ def create_accountant(mechanism: str) -> IAccountant:
return RDPAccountant()
elif mechanism == "gdp":
return GaussianAccountant()
elif mechanism == "rdp_analytic":
return RDPAccountantAnalyticMoment()

raise ValueError(f"Unexpected accounting mechanism: {mechanism}")
124 changes: 124 additions & 0 deletions opacus/accountants/analysis/rdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ def _log_sub(logx: float, logy: float) -> float:
except OverflowError:
return logx

def _log_add_list(log_list: list) -> float:
r"""Adds a list of numbers in the log space.

Args:
log_list
Returns:
Sum of numbers in log space. i.e. log(exp(x_1) + exp(x_2) + exp(x_3) + ....)
"""
a = np.max(log_list)
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return a + np.log(np.sum(np.exp(log_list-a)))



def _compute_log_a_for_int_alpha(q: float, sigma: float, alpha: int) -> float:
r"""Computes :math:`log(A_\alpha)` for integer ``alpha``.
Expand Down Expand Up @@ -333,3 +346,114 @@ def get_privacy_spent(
f"Optimal order is the {extreme} alpha. Please consider expanding the range of alphas to get a tighter privacy bound."
)
return eps[idx_opt], orders_vec[idx_opt]

# ==================================
# Functions for General Composition
# ==================================

# Functions used for implementation of Wang's Generalized analytic moment bounds for subsampled mechanisms
# based on theorem 9 of https://arxiv.org/pdf/1808.00087.pdf and implementation https://github.com/yuxiangw/autodp/blob/master/autodp/rdp_acct.py

def logcomb(n, k):
return (special.gammaln(n+1) - special.gammaln(n-k+1) - special.gammaln(k+1))

def _SGM_compute_rdp_subsample(q: float,sigma: float, alpha: float) -> float:
r"""Computes bound of RDP of the Sampled Mechanism at order ``alpha``.
Args:
q: Subsampling rate of
sigma: The standard deviation of the additive Gaussian noise.
alpha: The order at which RDP is computed.
Returns:
RDP at order ``alpha``; can be np.inf.s
"""
# SGM rdp calculation:
def func(x):
return alpha / (2*sigma**2)

if q == 0:
return 0

if q == 1.0:
return func(alpha)

if np.isinf(alpha):
return np.inf

def cgf(x):
return x * func(x+1)

# since calculations rely on binomial expansion - we do the calculation for integer alpha and then interpolate
def subsample_func_int(x):
# output the cgf of the subsampled mechanism
mm = int(x)
eps_inf = func(np.inf)
eps_two = func(2.0)

moments_two = 2 * np.log(q) + logcomb(mm,2) \
+ np.minimum(np.log(4) + eps_two + np.log(1-np.exp(-eps_two)),
eps_two + np.minimum(np.log(2),
2 * (eps_inf+np.log(1-np.exp(-eps_inf)))))
moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
np.log(2)) + cgf(j - 1) \
+ j * np.log(q) + logcomb(mm, j)
moments = [moment_bound(j) for j in range(3, mm + 1, 1)]

return np.minimum((x-1)*func(x), _log_add_list([0,moments_two] + moments))

def subsample_func(x):
# This function returns the RDP at alpha = x
# RDP with the linear interpolation upper bound of the CGF

# FROM auto_dp repo :

# This result applies to both subsampling with replacement and Poisson subsampling.
# The result for Poisson subsmapling is due to Theorem 1 of :
# Li, Ninghui, Qardaji, Wahbeh, and Su, Dong. On sampling, anonymization, and differential privacy or,
# k-anonymization meets differential privacy
# The result for Subsampling with replacement is due to:
# Jon Ullman's lecture notes: http://www.ccs.neu.edu/home/jullman/PrivacyS17/HW1sol.pdf
# See the proof of (b)

epsinf = np.log(1+q*(np.exp(func(np.inf))-1))

if np.isinf(x):
return epsinf
if q == 1.0:
return func(x)

if (x >= 1.0) and (x <= 2.0):
return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
if np.equal(np.mod(x, 1), 0):
return np.minimum(epsinf, subsample_func_int(x) / (x-1))
xc = math.ceil(x)
xf = math.floor(x)
return np.min(
[epsinf,func(x),
((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)]
)

return subsample_func(alpha)

def SGM_compute_general_subsampled_rdp_bound(
*, q: float, noise_multiplier: float, steps: int, orders: Union[List[float], float]
) -> Union[List[float], float]:
r"""Computes Renyi Differential Privacy (RDP) analytic moment bound for general subsampled mechanism iterated ``steps`` times.
Note that Opacus actually does Poisson subsampling so this bound in not actually valid
Args:
q: Sampling rate of the mechanism
noise_multiplier: The ratio of the standard deviation of the
additive Gaussian noise to the L2-sensitivity of the function
to which it is added. Note that this is same as the standard
deviation of the additive Gaussian noise when the L2-sensitivity
of the function is 1.
steps: The number of iterations of the mechanism.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDP guarantees at all orders; can be ``np.inf``.
"""
if isinstance(orders, float):
rdp = _SGM_compute_rdp_subsample(q,noise_multiplier,orders)
else:
rdp = np.array([_SGM_compute_rdp_subsample(q,noise_multiplier, order) for order in orders])

return rdp * steps
88 changes: 88 additions & 0 deletions opacus/accountants/rdp_analytic_moment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional, Tuple, Union

from .accountant import IAccountant
from .analysis import rdp as privacy_analysis

class RDPAccountantAnalyticMoment(IAccountant):
DEFAULT_ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))

def __init__(self):
super().__init__()

def step(self, *, noise_multiplier: float, sample_rate: float):
if len(self.history) >= 1:
last_noise_multiplier, last_sample_rate, num_steps = self.history.pop()
if (
last_noise_multiplier == noise_multiplier
and last_sample_rate == sample_rate
):
self.history.append(
(last_noise_multiplier, last_sample_rate, num_steps + 1)
)
else:
self.history.append(
(last_noise_multiplier, last_sample_rate, num_steps)
)
self.history.append((noise_multiplier, sample_rate, 1))

else:
self.history.append((noise_multiplier, sample_rate, 1))

def get_privacy_spent(
self, *, delta: float, alphas: Optional[List[Union[float, int]]] = None
) -> Tuple[float, float]:
if not self.history:
return 0, 0

if alphas is None:
alphas = self.DEFAULT_ALPHAS
rdp = sum(
[
privacy_analysis.SGM_compute_general_subsampled_rdp_bound(
q=sample_rate,
noise_multiplier=noise_multiplier,
steps=num_steps,
orders=alphas,
)
for (noise_multiplier, sample_rate, num_steps) in self.history
]
)
eps, best_alpha = privacy_analysis.get_privacy_spent(
orders=alphas, rdp=rdp, delta=delta
)
return float(eps), float(best_alpha)

def get_epsilon(
self, delta: float, alphas: Optional[List[Union[float, int]]] = None
):
"""
Return privacy budget (epsilon) expended so far.

Args:
delta: target delta
alphas: List of RDP orders (alphas) used to search for the optimal conversion
between RDP and (epd, delta)-DP
"""
eps, _ = self.get_privacy_spent(delta=delta, alphas=alphas)
return eps

def __len__(self):
return len(self.history)

@classmethod
def mechanism(cls) -> str:
return "rdp_analytic"