From e5bfce252ed7352800d9ee0501ead00565424abf Mon Sep 17 00:00:00 2001
From: Dariush Wahdany <dariush.wahdany@aisec.fraunhofer.de>
Date: Tue, 2 Jan 2024 14:59:34 +0000
Subject: [PATCH] feat(LS): mechanism

---
 src/dp_learning_ff/least_squares.py | 287 +++++++++++++++++++---------
 src/dp_learning_ff/mechanisms.py    |  19 +-
 2 files changed, 210 insertions(+), 96 deletions(-)

diff --git a/src/dp_learning_ff/least_squares.py b/src/dp_learning_ff/least_squares.py
index 17fa916..745e362 100644
--- a/src/dp_learning_ff/least_squares.py
+++ b/src/dp_learning_ff/least_squares.py
@@ -2,105 +2,206 @@
 
 import numpy as np
 
+from dp_learning_ff.mechanisms import LeastSquaresCDPM, calibrate_single_param
+
 from .utils import clip_features, dp_covariance
 
 
-def noisy_sum(
-    X_clip,
-    clipping_norm,
-    noise_multiplier,
-    rng,
-    k_classes=1,
-):
-    d = X_clip.shape[1]
-    b = np.sum(X_clip, axis=0)
-    b += rng.normal(
-        scale=clipping_norm * noise_multiplier * np.sqrt(k_classes), size=(d)
-    )
-    return b
-
-
-def dp_least_squares(
-    A,
-    y,
-    weight_alpha,
-    reg_lambda,
-    clipping_norm,
-    noise_multiplier,
-    seed=42,
-    k_classes: Optional[int] = None,
-):
-    """Build and solve the differentially private least squares problem.
-    Algorithm attempts to follow the description (Algorithm 3) in:
-
-    Mehta, H., Krichene, W., Thakurta, A., Kurakin, A., & Cutkosky, A. (2022).
-    Differentially private image classification from features.
-    arXiv preprint arXiv:2211.13403.
-
-    Args:
-        A: (n, d) matrix of features
-        y: (n,) vector of labels
-        weight_alpha: weight of the global covariance matrix
-        reg_lambda: regularization parameter
-        clipping_norm: L2 norm to clip to
-        noise_multiplier: noise multiplier for DP-SGD
-        k_classes: maximum number of positive classes per sample
-    Returns:
-        x: (d,) vector of weights
-    """
-    n, d = A.shape
-    assert y.shape == (n,)
-    assert clipping_norm > 0
-    assert noise_multiplier > 0
-    assert reg_lambda >= 0
-    assert weight_alpha >= 0
-
-    rng = np.random.default_rng(seed)
-
-    A_clip = clip_features(A, clipping_norm)
-
-    G = dp_covariance(
-        A_clip,
-        (noise_multiplier * clipping_norm**2),
-        rng,
-    )  # k_classes is always 1 for global G
-    targets = np.unique(y)
-    if k_classes is None:
-        k_classes = 1
-    else:
-        assert (
-            k_classes < targets
-        ), "K_classes cannot be larger than the number of unique classes"
-        assert (
-            k_classes >= 1
-        ), "There must be at least one sample with at least one positive class (k_classes > 1)"
-    thetas = []
-    for target in targets:
-        x_class = A_clip[np.where(y == target)[0]]
-        A_class = dp_covariance(
-            x_class,
-            (noise_multiplier * np.sqrt(k_classes) * clipping_norm**2),
-            rng,
+class LeastSquaresClassifier:
+    def __init__(
+        self,
+        epsilon: float,
+        delta: float,
+        clipping_norm,
+        reg_lambda,
+        weight_alpha,
+        seed: int = 42,
+        k_classes: Optional[int] = None,
+    ) -> None:
+        self.epsilon = epsilon
+        self.delta = delta
+        self.clipping_norm = clipping_norm
+        self.reg_lambda = reg_lambda
+        self.weight_alpha = weight_alpha
+        self.seed = seed
+        self.k_classes = k_classes
+        self.mechanism = None
+
+    @property
+    def epsilon(self):
+        return self._epsilon
+
+    @epsilon.setter
+    def epsilon(self, value):
+        self._epsilon = value
+
+    @property
+    def delta(self):
+        return self._delta
+
+    @delta.setter
+    def delta(self, value):
+        assert value is None or value > 0, "delta must be positive"
+        self._delta = value
+
+    @property
+    def p_sampling(self):
+        return self._p_sampling
+
+    @p_sampling.setter
+    def p_sampling(self, value):
+        assert value is None or (
+            value > 0 and value <= 1
+        ), "p_sampling must be in (0, 1]"
+        self._p_sampling = value
+
+    def try_calibrate(self):
+        attrs = ["_epsilon", "_delta"]
+        for attr in attrs:
+            if not hasattr(self, attr) or getattr(self, attr) is None:
+                return
+        return self.calibrate()
+
+    def calibrate(self):
+        print(
+            "Calibrating mechanism to epsilon={}, delta={}".format(
+                self.epsilon,
+                self.delta,
+            )
+        )
+
+        def scaled_mechanism(scale):
+            return LeastSquaresCDPM(
+                noise_multiplier=scale,
+                p_sampling=self.p_sampling,
+            )
+
+        calibrated_mechanism = calibrate_single_param(
+            scaled_mechanism, self.epsilon, self.delta
+        )
+        epsilon = calibrated_mechanism.get_approxDP(self.delta)
+        print(
+            "Calibrated mechanism with epsilon={}, scale={}, params={},".format(
+                epsilon, calibrated_mechanism.scale, calibrated_mechanism.params
+            )
         )
-        b_class = noisy_sum(x_class, clipping_norm, noise_multiplier, rng, k_classes)
-        theta_class = np.linalg.solve(
-            A_class + weight_alpha * G + reg_lambda * np.eye(d), b_class
+        self.mechanism = calibrated_mechanism
+
+    @staticmethod
+    def noisy_sum(
+        X_clip,
+        clipping_norm,
+        noise_multiplier,
+        rng,
+        k_classes=1,
+    ):
+        d = X_clip.shape[1]
+        b = np.sum(X_clip, axis=0)
+        b += rng.normal(
+            scale=clipping_norm * noise_multiplier * np.sqrt(k_classes), size=(d)
         )
-        thetas.append(theta_class)
+        return b
+
+    @staticmethod
+    def dp_least_squares(
+        A,
+        y,
+        weight_alpha,
+        reg_lambda,
+        clipping_norm,
+        noise_multiplier,
+        seed=42,
+        k_classes: Optional[int] = None,
+    ):
+        """Build and solve the differentially private least squares problem.
+        Algorithm attempts to follow the description (Algorithm 3) in:
+
+        Mehta, H., Krichene, W., Thakurta, A., Kurakin, A., & Cutkosky, A. (2022).
+        Differentially private image classification from features.
+        arXiv preprint arXiv:2211.13403.
+
+        Args:
+            A: (n, d) matrix of features
+            y: (n,) vector of labels
+            weight_alpha: weight of the global covariance matrix
+            reg_lambda: regularization parameter
+            clipping_norm: L2 norm to clip to
+            noise_multiplier: noise multiplier for DP-SGD
+            k_classes: maximum number of positive classes per sample
+        Returns:
+            x: (d,) vector of weights
+        """
+        n, d = A.shape
+        assert y.shape == (n,)
+        assert clipping_norm > 0
+        assert noise_multiplier > 0
+        assert reg_lambda >= 0
+        assert weight_alpha >= 0
 
-    return np.asarray(thetas)
+        rng = np.random.default_rng(seed)
 
+        A_clip = clip_features(A, clipping_norm)
 
-def least_squares_classification(
-    observations: np.ndarray, theta: np.ndarray
-) -> np.ndarray:
-    """Returns the predictions of the least squares classifier.
-    `n` is the number of observations, `d` is the dimension of the observations, and `k` is the number of classes.
-    Args:
-        observations (np.ndarray): (n, d)-array containing the observations.
-        theta (np.ndarray): (k, d)-array containing the least squares solution.
+        G = dp_covariance(
+            A_clip,
+            (noise_multiplier * clipping_norm**2),
+            rng,
+        )  # k_classes is always 1 for global G
+        targets = np.unique(y)
+        if k_classes is None:
+            k_classes = 1
+        else:
+            assert (
+                k_classes < targets
+            ), "K_classes cannot be larger than the number of unique classes"
+            assert (
+                k_classes >= 1
+            ), "There must be at least one sample with at least one positive class (k_classes > 1)"
+        thetas = []
+        for target in targets:
+            x_class = A_clip[np.where(y == target)[0]]
+            A_class = dp_covariance(
+                x_class,
+                (noise_multiplier * np.sqrt(k_classes) * clipping_norm**2),
+                rng,
+            )
+            b_class = LeastSquaresClassifier.noisy_sum(
+                x_class, clipping_norm, noise_multiplier, rng, k_classes
+            )
+            theta_class = np.linalg.solve(
+                A_class + weight_alpha * G + reg_lambda * np.eye(d), b_class
+            )
+            thetas.append(theta_class)
+
+        return np.asarray(thetas)
+
+    @staticmethod
+    def least_squares_classification(
+        observations: np.ndarray, theta: np.ndarray
+    ) -> np.ndarray:
+        """Returns the predictions of the least squares classifier.
+        `n` is the number of observations, `d` is the dimension of the observations, and `k` is the number of classes.
+        Args:
+            observations (np.ndarray): (n, d)-array containing the observations.
+            theta (np.ndarray): (k, d)-array containing the least squares solution.
+
+        Returns:
+            np.ndarray: (n, )-array containing the predictions.
+        """
+        return np.argmax(observations @ theta.T, axis=1)
+
+    def fit(self, A, y):
+        assert self.mechanism is not None, "Must calibrate mechanism first"
+        self.theta = self.dp_least_squares(
+            A,
+            y,
+            self.weight_alpha,
+            self.reg_lambda,
+            self.clipping_norm,
+            self.seed,
+            self.k_classes,
+        )
 
-    Returns:
-        np.ndarray: (n, )-array containing the predictions.
-    """
-    return np.argmax(observations @ theta.T, axis=1)
+    def classify(self, X):
+        assert self.theta is not None, "Must fit classifier first"
+        return self.least_squares_classification(X, self.theta)
diff --git a/src/dp_learning_ff/mechanisms.py b/src/dp_learning_ff/mechanisms.py
index 9dc583f..a0be64d 100644
--- a/src/dp_learning_ff/mechanisms.py
+++ b/src/dp_learning_ff/mechanisms.py
@@ -1,9 +1,9 @@
+import math
 from typing import Iterable, Literal, Optional
 
 from autodp.autodp_core import Mechanism
-from autodp.mechanism_zoo import GaussianMechanism
-from autodp.transformer_zoo import ComposeGaussian, AmplificationBySampling
-import math
+from autodp.mechanism_zoo import GaussianMechanism, zCDP_Mechanism
+from autodp.transformer_zoo import AmplificationBySampling, ComposeGaussian
 
 
 class CoinpressGM(Mechanism):
@@ -70,6 +70,19 @@ def __init__(
         super().__init__(name=name, p_sampling=p_sampling, Ps=Ps)
 
 
+class LeastSquaresCDPM(Mechanism):
+    def __init__(self, noise_multiplier, p_sampling: float = 1, name="LeastSquares"):
+        assert noise_multiplier > 0, "noise_multiplier must be positive"
+        assert p_sampling <= 1, "p_sampling must be <= 1"
+        assert p_sampling > 0, "p_sampling must be positive"
+        self.params = {"noise_multiplier": noise_multiplier}
+        mechanism = zCDP_Mechanism(rho=3 / (2 * noise_multiplier**2), xi=0)
+        if p_sampling < 1:
+            preprocessing = AmplificationBySampling()
+            mechanism = preprocessing.amplify(mechanism, p_sampling)
+        self.set_all_representation(mechanism)
+
+
 def calibrate_single_param(mechanism_class, epsilon, delta, verbose: bool = False):
     def obj(x):
         return mechanism_class(x).get_approxDP(delta)