From e5bfce252ed7352800d9ee0501ead00565424abf Mon Sep 17 00:00:00 2001 From: Dariush Wahdany Date: Tue, 2 Jan 2024 14:59:34 +0000 Subject: [PATCH] feat(LS): mechanism --- src/dp_learning_ff/least_squares.py | 287 +++++++++++++++++++--------- src/dp_learning_ff/mechanisms.py | 19 +- 2 files changed, 210 insertions(+), 96 deletions(-) diff --git a/src/dp_learning_ff/least_squares.py b/src/dp_learning_ff/least_squares.py index 17fa916..745e362 100644 --- a/src/dp_learning_ff/least_squares.py +++ b/src/dp_learning_ff/least_squares.py @@ -2,105 +2,206 @@ import numpy as np +from dp_learning_ff.mechanisms import LeastSquaresCDPM, calibrate_single_param + from .utils import clip_features, dp_covariance -def noisy_sum( - X_clip, - clipping_norm, - noise_multiplier, - rng, - k_classes=1, -): - d = X_clip.shape[1] - b = np.sum(X_clip, axis=0) - b += rng.normal( - scale=clipping_norm * noise_multiplier * np.sqrt(k_classes), size=(d) - ) - return b - - -def dp_least_squares( - A, - y, - weight_alpha, - reg_lambda, - clipping_norm, - noise_multiplier, - seed=42, - k_classes: Optional[int] = None, -): - """Build and solve the differentially private least squares problem. - Algorithm attempts to follow the description (Algorithm 3) in: - - Mehta, H., Krichene, W., Thakurta, A., Kurakin, A., & Cutkosky, A. (2022). - Differentially private image classification from features. - arXiv preprint arXiv:2211.13403. - - Args: - A: (n, d) matrix of features - y: (n,) vector of labels - weight_alpha: weight of the global covariance matrix - reg_lambda: regularization parameter - clipping_norm: L2 norm to clip to - noise_multiplier: noise multiplier for DP-SGD - k_classes: maximum number of positive classes per sample - Returns: - x: (d,) vector of weights - """ - n, d = A.shape - assert y.shape == (n,) - assert clipping_norm > 0 - assert noise_multiplier > 0 - assert reg_lambda >= 0 - assert weight_alpha >= 0 - - rng = np.random.default_rng(seed) - - A_clip = clip_features(A, clipping_norm) - - G = dp_covariance( - A_clip, - (noise_multiplier * clipping_norm**2), - rng, - ) # k_classes is always 1 for global G - targets = np.unique(y) - if k_classes is None: - k_classes = 1 - else: - assert ( - k_classes < targets - ), "K_classes cannot be larger than the number of unique classes" - assert ( - k_classes >= 1 - ), "There must be at least one sample with at least one positive class (k_classes > 1)" - thetas = [] - for target in targets: - x_class = A_clip[np.where(y == target)[0]] - A_class = dp_covariance( - x_class, - (noise_multiplier * np.sqrt(k_classes) * clipping_norm**2), - rng, +class LeastSquaresClassifier: + def __init__( + self, + epsilon: float, + delta: float, + clipping_norm, + reg_lambda, + weight_alpha, + seed: int = 42, + k_classes: Optional[int] = None, + ) -> None: + self.epsilon = epsilon + self.delta = delta + self.clipping_norm = clipping_norm + self.reg_lambda = reg_lambda + self.weight_alpha = weight_alpha + self.seed = seed + self.k_classes = k_classes + self.mechanism = None + + @property + def epsilon(self): + return self._epsilon + + @epsilon.setter + def epsilon(self, value): + self._epsilon = value + + @property + def delta(self): + return self._delta + + @delta.setter + def delta(self, value): + assert value is None or value > 0, "delta must be positive" + self._delta = value + + @property + def p_sampling(self): + return self._p_sampling + + @p_sampling.setter + def p_sampling(self, value): + assert value is None or ( + value > 0 and value <= 1 + ), "p_sampling must be in (0, 1]" + self._p_sampling = value + + def try_calibrate(self): + attrs = ["_epsilon", "_delta"] + for attr in attrs: + if not hasattr(self, attr) or getattr(self, attr) is None: + return + return self.calibrate() + + def calibrate(self): + print( + "Calibrating mechanism to epsilon={}, delta={}".format( + self.epsilon, + self.delta, + ) + ) + + def scaled_mechanism(scale): + return LeastSquaresCDPM( + noise_multiplier=scale, + p_sampling=self.p_sampling, + ) + + calibrated_mechanism = calibrate_single_param( + scaled_mechanism, self.epsilon, self.delta + ) + epsilon = calibrated_mechanism.get_approxDP(self.delta) + print( + "Calibrated mechanism with epsilon={}, scale={}, params={},".format( + epsilon, calibrated_mechanism.scale, calibrated_mechanism.params + ) ) - b_class = noisy_sum(x_class, clipping_norm, noise_multiplier, rng, k_classes) - theta_class = np.linalg.solve( - A_class + weight_alpha * G + reg_lambda * np.eye(d), b_class + self.mechanism = calibrated_mechanism + + @staticmethod + def noisy_sum( + X_clip, + clipping_norm, + noise_multiplier, + rng, + k_classes=1, + ): + d = X_clip.shape[1] + b = np.sum(X_clip, axis=0) + b += rng.normal( + scale=clipping_norm * noise_multiplier * np.sqrt(k_classes), size=(d) ) - thetas.append(theta_class) + return b + + @staticmethod + def dp_least_squares( + A, + y, + weight_alpha, + reg_lambda, + clipping_norm, + noise_multiplier, + seed=42, + k_classes: Optional[int] = None, + ): + """Build and solve the differentially private least squares problem. + Algorithm attempts to follow the description (Algorithm 3) in: + + Mehta, H., Krichene, W., Thakurta, A., Kurakin, A., & Cutkosky, A. (2022). + Differentially private image classification from features. + arXiv preprint arXiv:2211.13403. + + Args: + A: (n, d) matrix of features + y: (n,) vector of labels + weight_alpha: weight of the global covariance matrix + reg_lambda: regularization parameter + clipping_norm: L2 norm to clip to + noise_multiplier: noise multiplier for DP-SGD + k_classes: maximum number of positive classes per sample + Returns: + x: (d,) vector of weights + """ + n, d = A.shape + assert y.shape == (n,) + assert clipping_norm > 0 + assert noise_multiplier > 0 + assert reg_lambda >= 0 + assert weight_alpha >= 0 - return np.asarray(thetas) + rng = np.random.default_rng(seed) + A_clip = clip_features(A, clipping_norm) -def least_squares_classification( - observations: np.ndarray, theta: np.ndarray -) -> np.ndarray: - """Returns the predictions of the least squares classifier. - `n` is the number of observations, `d` is the dimension of the observations, and `k` is the number of classes. - Args: - observations (np.ndarray): (n, d)-array containing the observations. - theta (np.ndarray): (k, d)-array containing the least squares solution. + G = dp_covariance( + A_clip, + (noise_multiplier * clipping_norm**2), + rng, + ) # k_classes is always 1 for global G + targets = np.unique(y) + if k_classes is None: + k_classes = 1 + else: + assert ( + k_classes < targets + ), "K_classes cannot be larger than the number of unique classes" + assert ( + k_classes >= 1 + ), "There must be at least one sample with at least one positive class (k_classes > 1)" + thetas = [] + for target in targets: + x_class = A_clip[np.where(y == target)[0]] + A_class = dp_covariance( + x_class, + (noise_multiplier * np.sqrt(k_classes) * clipping_norm**2), + rng, + ) + b_class = LeastSquaresClassifier.noisy_sum( + x_class, clipping_norm, noise_multiplier, rng, k_classes + ) + theta_class = np.linalg.solve( + A_class + weight_alpha * G + reg_lambda * np.eye(d), b_class + ) + thetas.append(theta_class) + + return np.asarray(thetas) + + @staticmethod + def least_squares_classification( + observations: np.ndarray, theta: np.ndarray + ) -> np.ndarray: + """Returns the predictions of the least squares classifier. + `n` is the number of observations, `d` is the dimension of the observations, and `k` is the number of classes. + Args: + observations (np.ndarray): (n, d)-array containing the observations. + theta (np.ndarray): (k, d)-array containing the least squares solution. + + Returns: + np.ndarray: (n, )-array containing the predictions. + """ + return np.argmax(observations @ theta.T, axis=1) + + def fit(self, A, y): + assert self.mechanism is not None, "Must calibrate mechanism first" + self.theta = self.dp_least_squares( + A, + y, + self.weight_alpha, + self.reg_lambda, + self.clipping_norm, + self.seed, + self.k_classes, + ) - Returns: - np.ndarray: (n, )-array containing the predictions. - """ - return np.argmax(observations @ theta.T, axis=1) + def classify(self, X): + assert self.theta is not None, "Must fit classifier first" + return self.least_squares_classification(X, self.theta) diff --git a/src/dp_learning_ff/mechanisms.py b/src/dp_learning_ff/mechanisms.py index 9dc583f..a0be64d 100644 --- a/src/dp_learning_ff/mechanisms.py +++ b/src/dp_learning_ff/mechanisms.py @@ -1,9 +1,9 @@ +import math from typing import Iterable, Literal, Optional from autodp.autodp_core import Mechanism -from autodp.mechanism_zoo import GaussianMechanism -from autodp.transformer_zoo import ComposeGaussian, AmplificationBySampling -import math +from autodp.mechanism_zoo import GaussianMechanism, zCDP_Mechanism +from autodp.transformer_zoo import AmplificationBySampling, ComposeGaussian class CoinpressGM(Mechanism): @@ -70,6 +70,19 @@ def __init__( super().__init__(name=name, p_sampling=p_sampling, Ps=Ps) +class LeastSquaresCDPM(Mechanism): + def __init__(self, noise_multiplier, p_sampling: float = 1, name="LeastSquares"): + assert noise_multiplier > 0, "noise_multiplier must be positive" + assert p_sampling <= 1, "p_sampling must be <= 1" + assert p_sampling > 0, "p_sampling must be positive" + self.params = {"noise_multiplier": noise_multiplier} + mechanism = zCDP_Mechanism(rho=3 / (2 * noise_multiplier**2), xi=0) + if p_sampling < 1: + preprocessing = AmplificationBySampling() + mechanism = preprocessing.amplify(mechanism, p_sampling) + self.set_all_representation(mechanism) + + def calibrate_single_param(mechanism_class, epsilon, delta, verbose: bool = False): def obj(x): return mechanism_class(x).get_approxDP(delta)