From 20fd10c425c47c38a775b2223c5d7161264f144f Mon Sep 17 00:00:00 2001
From: Anselm Paulus <40758879+a-paulus@users.noreply.github.com>
Date: Mon, 29 Jul 2024 11:19:37 +0200
Subject: [PATCH 01/13] Update README.md

---
 README.md | 182 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 181 deletions(-)

diff --git a/README.md b/README.md
index 639b0d0..3f85fb9 100644
--- a/README.md
+++ b/README.md
@@ -1,181 +1 @@
-[![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml)
-
-# diffcp
-
-`diffcp` is a Python package for computing the derivative of a convex cone program, with respect to its problem data. The derivative is implemented as an abstract linear map, with methods for its forward application and its adjoint. 
-
-The implementation is based on the calculations in our paper [Differentiating through a cone program](http://web.stanford.edu/~boyd/papers/diff_cone_prog.html).
-
-### Installation
-`diffcp` is available on PyPI, as a source distribution. Install it with
-
-```bash
-pip install diffcp
-```
-
-You will need a C++11-capable compiler to build `diffcp`.
-
-`diffcp` requires:
-* [NumPy](https://github.com/numpy/numpy) >= 1.15
-* [SciPy](https://github.com/scipy/scipy) >= 1.10
-* [SCS](https://github.com/bodono/scs-python) >= 2.0.2
-* [pybind11](https://github.com/pybind/pybind11/tree/stable) >= 2.4
-* [threadpoolctl](https://github.com/joblib/threadpoolctl) >= 1.1
-* [ECOS](https://github.com/embotech/ecos-python) >= 2.0.10
-* [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) >= 0.5.1
-* Python >= 3.7
-
-`diffcp` uses Eigen; Eigen operations can be automatically vectorized by compilers. To enable vectorization, install with
-
-```bash
-MARCH_NATIVE=1 pip install diffcp
-```
-
-OpenMP can be enabled by passing extra arguments to your compiler. For example, on linux, you can tell gcc to activate the OpenMP extension by specifying the flag "-fopenmp":
-
-```bash
-OPENMP_FLAG="-fopenmp" pip install diffcp
-```
-
-To enable both vectorization and OpenMP (on linux), use
-
-```bash
-MARCH_NATIVE=1 OPENMP_FLAG="-fopenmp" pip install diffcp
-```
-
-### Cone programs
-`diffcp` differentiates through a primal-dual cone program pair. The primal problem must be expressed as 
-
-```
-minimize        c'x
-subject to      Ax + s = b
-                s in K
-```
-where  `x` and `s` are variables, `A`, `b` and `c` are the user-supplied problem data, and `K` is a user-defined convex cone. The corresponding dual problem is
-
-```
-minimize        b'y
-subject to      A'y + c == 0
-                y in K^*
-```
-
-with dual variable `y`.
-
-### Usage
-
-`diffcp` exposes the function
-
-```python
-solve_and_derivative(A, b, c, cone_dict, warm_start=None, solver=None, **kwargs).
-```
-
-This function returns a primal-dual solution `x`, `y`, and `s`, along with
-functions for evaluating the derivative and its adjoint (transpose).
-These functions respectively compute right and left multiplication of the derivative
-of the solution map at `A`, `b`, and `c` by a vector.
-The `solver` argument determines which solver to use; the available solvers
-are `solver="SCS"`, `solver="ECOS"`, and `solver="Clarabel"`.
-If no solver is specified, `diffcp` will choose the solver itself.
-In the case that the problem is not solved, i.e. the solver fails for some reason, we will raise
-a `SolverError` Exception.
-
-#### Arguments
-The arguments `A`, `b`, and `c` correspond to the problem data of a cone program.
-* `A` must be a [SciPy sparse CSC matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html).
-* `b` and `c` must be NumPy arrays.
-* `cone_dict` is a dictionary that defines the convex cone `K`.
-* `warm_start` is an optional tuple `(x, y, s)` at which to warm-start. (Note: this is only available for the SCS solver).
-* `**kwargs` are keyword arguments to forward to the solver (e.g., `verbose=False`).
-
-These inputs must conform to the [SCS convention](https://github.com/bodono/scs-python) for problem data. The keys in `cone_dict` correspond to the cones, with
-* `diffcp.ZERO` for the zero cone,
-* `diffcp.POS` for the positive orthant,
-* `diffcp.SOC` for a product of SOC cones,
-* `diffcp.PSD` for a product of PSD cones, and
-* `diffcp.EXP` for a product of exponential cones.
-
-The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).
-
-#### Return value
-The function `solve_and_derivative` returns a tuple
-
-```python
-(x, y, s, derivative, adjoint_derivative)
-```
-
-* `x`, `y`, and `s` are a primal-dual solution.
-
-* `derivative` is a function that applies the derivative at `(A, b, c)` to perturbations `dA`, `db`, `dc`. It has the signature 
-```derivative(dA, db, dc) -> dx, dy, ds```, where `dA` is a SciPy sparse CSC matrix with the same sparsity pattern as `A`, and `db` and `dc` are NumPy arrays. `dx`, `dy`, and `ds` are NumPy arrays, approximating the change in the primal-dual solution due to the perturbation.
-
-* `adjoint_derivative` is a function that applies the adjoint of the derivative to perturbations `dx`, `dy`, `ds`. It has the signature 
-```adjoint_derivative(dx, dy, ds) -> dA, db, dc```, where `dx`, `dy`, and `ds` are NumPy arrays.
-
-#### Example
-```python
-import numpy as np
-from scipy import sparse
-
-import diffcp
-
-cone_dict = {
-    diffcp.ZERO: 3,
-    diffcp.POS: 3,
-    diffcp.SOC: [5]
-}
-
-m = 3 + 3 + 5
-n = 5
-
-A, b, c = diffcp.utils.random_cone_prog(m, n, cone_dict)
-x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict)
-
-# evaluate the derivative
-nonzeros = A.nonzero()
-data = 1e-4 * np.random.randn(A.size)
-dA = sparse.csc_matrix((data, nonzeros), shape=A.shape)
-db = 1e-4 * np.random.randn(m)
-dc = 1e-4 * np.random.randn(n)
-dx, dy, ds = D(dA, db, dc)
-
-# evaluate the adjoint of the derivative
-dx = c
-dy = np.zeros(m)
-ds = np.zeros(m)
-dA, db, dc = DT(dx, dy, ds)
-```
-
-For more examples, including the SDP example described in the paper, see the [`examples`](examples/) directory.
-
-### Citing
-If you wish to cite `diffcp`, please use the following BibTex:
-
-```
-@article{diffcp2019,
-    author       = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.},
-    title        = {Differentiating through a Cone Program},
-    journal      = {Journal of Applied and Numerical Optimization},
-    year         = {2019},
-    volume       = {1},
-    number       = {2},
-    pages        = {107--115},
-}
-
-@misc{diffcp,
-    author       = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.},
-    title        = {{diffcp}: differentiating through a cone program, version 1.0},
-    howpublished = {\url{https://github.com/cvxgrp/diffcp}},
-    year         = 2019
-}
-```
-
-The following thesis concurrently derived the mathematics behind differentiating cone programs.
-```
-@phdthesis{amos2019differentiable,
-  author       = {Brandon Amos},
-  title        = {{Differentiable Optimization-Based Modeling for Machine Learning}},
-  school       = {Carnegie Mellon University},
-  year         = 2019,
-  month        = May,
-}
-```
+This branch is currently only a fork of the original diffcp implementation. The LPGD implementation will be added soon.

From 26727e1f27ff2bd4870d3e2deab2cea72fc431ed Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 12:38:45 +0200
Subject: [PATCH 02/13] Add LPGD

---
 diffcp/cone_program.py | 383 ++++++++++++++++++++++++++++++++++-------
 diffcp/utils.py        |  13 ++
 examples/batch_LPGD.py |  42 +++++
 3 files changed, 373 insertions(+), 65 deletions(-)
 create mode 100644 examples/batch_LPGD.py

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index 9919108..ea3fb86 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -12,6 +12,8 @@
 
 import _diffcp
 import diffcp.cones as cone_lib
+import copy
+from diffcp.utils import regularize_P
 
 
 def pi(z, cones):
@@ -49,7 +51,7 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs
             means serial and n_jobs_forward = -1 defaults to the number of CPUs (default=-1).
         n_jobs_backward - Number of jobs to use in the backward pass. n_jobs_backward = 1
             means serial and n_jobs_backward = -1 defaults to the number of CPUs (default=-1).
-        mode - Differentiation mode in ["lsqr", "lsmr", "dense"].
+        mode - Differentiation mode in ["lsqr", "lsmr", "dense", "lpgd", "lpgd_right", "lpgd_left"].
         warm_starts - A list of warm starts.
         kwargs - kwargs sent to scs.
 
@@ -248,7 +250,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr',
           exponential cones. See SCS documentation for more details.
       warm_start: (optional) A tuple (x, y, s) at which to warm-start SCS.
       mode: (optional) Which mode to compute derivative with, options are
-          ["dense", "lsqr", "lsmr"].
+          ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"].
       solve_method: (optional) Name of solver to use; SCS, ECOS, or Clarabel.
       kwargs: (optional) Keyword arguments to send to the solver.
 
@@ -306,7 +308,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None,
 
 
 def solve_internal(A, b, c, cone_dict, solve_method=None,
-        warm_start=None, raise_on_error=True, **kwargs):
+        warm_start=None, raise_on_error=True, P=None, **kwargs):
 
     if solve_method is None:
         psd_cone = ('s' in cone_dict) and (cone_dict['s'] != [])
@@ -335,6 +337,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                 del kwargs["eps"]
 
         data = {
+            "P": P,
             "A": A,
             "b": b,
             "c": c
@@ -349,7 +352,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
         result = scs.solve(data, cone_dict, **kwargs)
 
         status = result["info"]["status"]
-        inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)"}
+        inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)", "solved (inaccurate - reached time_limit_secs)"}
         if status in inaccurate_status and "acceleration_lookback" not in kwargs:
             # anderson acceleration is sometimes unstable
             result = scs.solve(
@@ -367,6 +370,8 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                 return result
 
     elif solve_method == "ECOS":
+        if P is not None:
+            raise ValueError("ECOS does not support quadratic objectives.")
         if warm_start is not None:
             raise ValueError('ECOS does not support warmstart.')
         if ('s' in cone_dict) and (cone_dict['s'] != []):
@@ -449,7 +454,8 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                           'pobj': solution['info']['pcost']}
     elif solve_method == "Clarabel":
         # for now set P to 0
-        P = sparse.csc_matrix((c.size, c.size))
+        if P is None:
+            P = sparse.csc_matrix((c.size, c.size))
 
         cones = []
         if "z" in cone_dict:
@@ -510,10 +516,10 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
     return result
 
 def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
-        warm_start=None, mode='lsqr', raise_on_error=True, **kwargs):
-    if mode not in ["dense", "lsqr", "lsmr"]:
+        warm_start=None, mode='lsqr', raise_on_error=True, P=None, **kwargs):
+    if mode not in ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]:
         raise ValueError("Unsupported mode {}; the supported modes are "
-                         "'dense', 'lsqr' and 'lsmr'".format(mode))
+                         "'dense', 'lsqr', 'lsmr', 'lpgd', 'lpgd_right' and 'lpgd_left'".format(mode))
     if np.isnan(A.data).any():
         raise RuntimeError("Found a NaN in A.")
 
@@ -529,6 +535,11 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
     # eliminate explicit zeros in A, we no longer need them
     A.eliminate_zeros()
 
+    if "derivative_kwargs" in kwargs:
+        derivative_kwargs = kwargs.pop("derivative_kwargs")
+    else:
+        derivative_kwargs = {}
+    solver_kwargs = kwargs
     result = solve_internal(A, b, c, cone_dict, solve_method=solve_method,
         warm_start=warm_start, raise_on_error=raise_on_error, **kwargs)
     x = result["x"]
@@ -538,27 +549,29 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
     # pre-compute quantities for the derivative
     m, n = A.shape
     N = m + n + 1
-    cones = cone_lib.parse_cone_dict(cone_dict)
-    cones_parsed = cone_lib.parse_cone_dict_cpp(cones)
-    z = (x, y - s, np.array([1]))
-    u, v, w = z
 
-    Q = sparse.bmat([
-        [None, A.T, np.expand_dims(c, - 1)],
-        [-A, None, np.expand_dims(b, -1)],
-        [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]
-    ])
+    if "lpgd" not in mode:  # pre-compute quantities for the derivative
+        cones = cone_lib.parse_cone_dict(cone_dict)
+        cones_parsed = cone_lib.parse_cone_dict_cpp(cones)
+        z = (x, y - s, np.array([1]))
+        u, v, w = z
+
+        Q = sparse.bmat([
+            [None, A.T, np.expand_dims(c, - 1)],
+            [-A, None, np.expand_dims(b, -1)],
+            [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]
+        ])
 
-    D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True)
-    if mode == "dense":
-        Q_dense = Q.todense()
-        M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w)
-        MT = M.T
-    elif mode in ("lsqr", "lsmr"):
-        M = _diffcp.M_operator(Q, cones_parsed, u, v, w)
-        MT = M.transpose()
+        D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True)
+        if mode == "dense":
+            Q_dense = Q.todense()
+            M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w)
+            MT = M.T
+        elif mode in ("lsqr", "lsmr"):
+            M = _diffcp.M_operator(Q, cones_parsed, u, v, w)
+            MT = M.transpose()
 
-    pi_z = pi(z, cones)
+        pi_z = pi(z, cones)
 
     def derivative(dA, db, dc, **kwargs):
         """Applies derivative at (A, b, c) to perturbations dA, db, dc
@@ -571,27 +584,32 @@ def derivative(dA, db, dc, **kwargs):
            NumPy arrays dx, dy, ds, the result of applying the derivative
            to the perturbations.
         """
-        dQ = sparse.bmat([
-            [None, dA.T, np.expand_dims(dc, - 1)],
-            [-dA, None, np.expand_dims(db, -1)],
-            [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]
-        ])
-        rhs = dQ @ pi_z
-        if np.allclose(rhs, 0):
-            dz = np.zeros(rhs.size)
-        elif mode == "dense":
-            dz = _diffcp._solve_derivative_dense(M, MT, rhs)
-        elif mode == "lsqr":
-            dz = _diffcp.lsqr(M, rhs).solution
-        elif mode == "lsmr":
-            M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec)
-            dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12)
-
-        du, dv, dw = np.split(dz, [n, n + m])
-        dx = du - x * dw
-        dy = D_proj_dual_cone.matvec(dv) - y * dw
-        ds = D_proj_dual_cone.matvec(dv) - dv - s * dw
-        return -dx, -dy, -ds
+
+        if "lpgd" in mode:
+            dx, dy, ds = derivative_lpgd(dA, db, dc, mode=mode, **derivative_kwargs, **kwargs)
+            return dx, dy, ds
+        else:
+            dQ = sparse.bmat([
+                [None, dA.T, np.expand_dims(dc, - 1)],
+                [-dA, None, np.expand_dims(db, -1)],
+                [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]
+            ])
+            rhs = dQ @ pi_z
+            if np.allclose(rhs, 0):
+                dz = np.zeros(rhs.size)
+            elif mode == "dense":
+                dz = _diffcp._solve_derivative_dense(M, MT, rhs)
+            elif mode == "lsqr":
+                dz = _diffcp.lsqr(M, rhs).solution
+            elif mode == "lsmr":
+                M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec)
+                dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12)
+
+            du, dv, dw = np.split(dz, [n, n + m])
+            dx = du - x * dw
+            dy = D_proj_dual_cone.matvec(dv) - y * dw
+            ds = D_proj_dual_cone.matvec(dv) - dv - s * dw
+            return -dx, -dy, -ds
 
     def adjoint_derivative(dx, dy, ds, **kwargs):
         """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds
@@ -604,26 +622,261 @@ def adjoint_derivative(dx, dy, ds, **kwargs):
             perturbations; the sparsity pattern of `dA` matches that of `A`.
         """
 
-        dw = -(x @ dx + y @ dy + s @ ds)
-        dz = np.concatenate(
-            [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])])
-
-        if np.allclose(dz, 0):
-            r = np.zeros(dz.shape)
-        elif mode == "dense":
-            r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz)
-        elif mode == "lsqr":
-            r = _diffcp.lsqr(MT, dz).solution
-        elif mode == "lsmr":
-            MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec)
-            r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10)
-
-        values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols]
-        dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
-        db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m]
-        dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n]
+        if "lpgd" in mode:
+            dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, mode=mode, **derivative_kwargs, **kwargs)
+        else:
+            dw = -(x @ dx + y @ dy + s @ ds)
+            dz = np.concatenate(
+                [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])])
+
+            if np.allclose(dz, 0):
+                r = np.zeros(dz.shape)
+            elif mode == "dense":
+                r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz)
+            elif mode == "lsqr":
+                r = _diffcp.lsqr(MT, dz).solution
+            elif mode == "lsmr":
+                MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec)
+                r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10)
+
+            values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols]
+            dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
+            db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m]
+            dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n]
 
         return dA, db, dc
+    
+    def derivative_lpgd(dA, db, dc, mode, tau, rho):
+        """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc
+        with optional regularization rho.
+        Args:
+            dA: SciPy sparse matrix in CSC format; must have same sparsity
+                pattern as the matrix `A` from the cone program
+            db: NumPy array representing perturbation in `b`
+            dc: NumPy array representing perturbation in `c`
+            mode: Mode of the derivative computation; options are
+                ["lpgd", "lpgd_left", "lpgd_right"] corresponding to central, left, and right finite differences.
+            tau: Perturbation strength parameter
+            rho: Regularization parameter
+        Returns:
+           NumPy arrays dx, dy, ds, the result of applying the derivative
+           to the perturbations.
+        """
+
+        if tau is None or tau <= 0:
+            raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.")
+        if rho < 0:
+            raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
+        
+        if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
+            x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs)
+
+        if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
+            x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho, solve_method, solver_kwargs)
+
+        if mode == "lpgd":
+            dx = (x_right - x_left) / (2 * tau)
+            dy = (y_right - y_left) / (2 * tau)
+            ds = (s_right - s_left) / (2 * tau)
+        elif mode == "lpgd_right":
+            dx = (x_right - x) / tau
+            dy = (y_right - y) / tau
+            ds = (s_right - s) / tau
+        elif mode == "lpgd_left":
+            dx = (x - x_left) / tau
+            dy = (y - y_left) / tau
+            ds = (s - s_left) / tau
+        else:
+            raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
+        return dx, dy, ds
+
+    def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs):
+        """
+        Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
+        perturbations dA, db, dc and optional regularization rho.
+        
+        Args:
+            dA: SciPy sparse matrix in CSC format; must have same sparsity
+                pattern as the matrix `A` from the cone program
+            db: NumPy array representing perturbation in `b`
+            dc: NumPy array representing perturbation in `c`
+            tau: Perturbation strength parameter
+            rho: Regularization parameter
+            solve_method: Name of solver to use; SCS, ECOS, or Clarabel
+            solver_kwargs: Additional keyword arguments to send to the solver
+        Returns:
+            x_pert: Perturbed solution of the primal variable x
+            y_pert: Perturbed solution of the dual variable y
+            s_pert: Perturbed solution of the slack variable s
+        """
+
+        # Perturb the problem
+        A_pert = A + tau * dA
+        b_pert = b + tau * db
+        c_pert = c + tau * dc
+
+        # Regularize
+        P_reg = regularize_P(P, rho=rho, size=n)
+        c_pert_reg = c_pert - rho * x
+
+        # Set warm start
+        warm_start = (x, y, s)
+
+        # Solve the perturbed problem
+        result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                     solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+        return x_pert, y_pert, s_pert
+    
+    def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho):
+        """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920]
+        Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, 
+        can be regarded as an efficient adjoint finite difference method.
+        Tau controls the perturbation strength. In the limit tau -> 0, LPGD computes the exact adjoint derivative.
+        For finite tau, LPGD computes an informative replacement of the adjoint derivative, avoiding degeneracies.
+        Rho controls the regularization strength. If rho > 0, the perturbed problem is regularized.
+
+        Args:
+            dx: NumPy array representing perturbation in `x`
+            dy: NumPy array representing perturbation in `y`
+            ds: NumPy array representing perturbation in `s`
+            mode: Mode of the adjoint derivative computation; options arem ["lpgd", "lpgd_left", "lpgd_right"]
+            tau: Perturbation strength parameter
+            rho: Regularization parameter
+        Returns:
+            (`dA`, `db`, `dc`), the result of applying the LPGD to the
+            perturbations; the sparsity pattern of `dA` matches that of `A`.
+        """
+
+        if tau is None or tau <= 0:
+            raise ValueError(f"Finite difference mode requires tau > 0, got tau={tau}.")
+        if rho < 0:
+            raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
+
+        if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
+            x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs)
+
+        if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
+            x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho, solve_method, solver_kwargs)
+
+        if mode == "lpgd":
+            dc = (x_right - x_left) / (2 * tau)
+            db = - (y_right - y_left) / (2 * tau)
+            dA_data = (y_right[rows] * x_right[cols] - y_left[rows] * x_left[cols]) / (2 * tau)
+        elif mode == "lpgd_right":
+            dc = (x_right - x) / tau
+            db = - (y_right - y) / tau
+            dA_data = (y_right[rows] * x_right[cols] - y[rows] * x[cols]) / tau
+        elif mode == "lpgd_left":
+            dc = (x - x_left) / tau
+            db = - (y - y_left) / tau
+            dA_data = (y[rows] * x[cols] - y_left[rows] * x_left[cols]) / tau
+        else:
+            raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
+        
+        dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape)
+        return dA, db, dc
+
+    def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs):
+        """
+        Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
+        by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho:
+        argmin.     <x,Px> + <c,x> + tau (<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
+        subject to  Ax + s = b-tau*dy
+                    s \in K
+
+        For ds=0 we solve
+        argmin.     <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
+        subject to  Ax + s = b-tau*dy
+                    s \in K
+        This is just a perturbed instance of the forward optimization problem.
+
+        For ds!=0 we rewrite the problem as an embedded problem.
+        We add a constraint x'=s, replace all appearances of s with x' and solve
+        argmin.     <[x, x'], [[P+rho*I,     0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
+                               [      0, rho*I]]
+        subject to  [[A,  I]; [x, x'] + [0; s] = [0; b-tau*dy]
+                     [0, -I]]
+                    s \in K
+        Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case.
+        
+        Args:
+            dx: NumPy array representing perturbation in `x`
+            dy: NumPy array representing perturbation in `y`
+            ds: NumPy array representing perturbation in `s`
+            tau: Perturbation strength parameter
+            rho: Regularization parameter
+            solve_method: Name of solver to use; SCS, ECOS, or Clarabel
+            solver_kwargs: Additional keyword arguments to send to the solver
+        Returns:
+            x_pert: Perturbed solution of the primal variable x
+            y_pert: Perturbed solution of the dual variable y
+            s_pert: Perturbed solution of the slack variable s
+        """
+
+        # The cases ds = 0 and ds != 0 are handled separately, see docstring
+        if np.isclose(np.sum(np.abs(ds)), 0):
+            # Perturb problem
+            c_pert = c + tau * dx
+            b_pert = b - tau * dy
+
+            # Regularize
+            P_reg = regularize_P(P, rho=rho, size=n)
+            c_pert_reg = c_pert - rho * x
+
+            # Set warm start
+            warm_start = (x, y, s) if solve_method != "ECOS" else None
+
+            # Solve the perturbed problem
+            # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update
+            result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            # Extract the solutions
+            x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+        else:
+            # Embed problem
+            A_emb = sparse.bmat([
+                [A, sparse.eye(m, format=A.format)],
+                [None, -sparse.eye(m, format=A.format)]
+            ]).tocsc()
+            b_emb = np.hstack([b, np.zeros(m)])
+            c_emb = np.hstack([c, np.zeros(m)])
+            if P is not None:
+                P_emb = sparse.bmat([
+                    [P, None],
+                    [None, np.zeros((m, m))]
+                ]).tocsc()
+            else:
+                P_emb = None
+            cone_dict_emb = copy.deepcopy(cone_dict)
+            if 'z' in cone_dict_emb:
+                cone_dict_emb['z'] += m
+            else:
+                cone_dict_emb['z'] = m
+
+            # Perturb problem
+            b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
+            c_emb_pert = c_emb + tau * np.hstack([dx, ds])
+            
+            # Regularize
+            P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
+            c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
+
+            # Set warm start
+            x_ws = np.hstack([x, s])
+            y_ws = np.hstack([y, y])
+            s_ws = np.hstack([s, s])
+            warm_start = (x_ws, y_ws, s_ws) if solve_method != "ECOS" else None
+
+            # Solve the embedded problem
+            result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            # Extract the solutions
+            x_pert = result_pert['x'][:n]
+            y_pert = result_pert['y'][:m]
+            s_pert = result_pert['x'][n:n+m]
+        return x_pert, y_pert, s_pert
 
     result["D"] = derivative
     result["DT"] = adjoint_derivative
diff --git a/diffcp/utils.py b/diffcp/utils.py
index 22dc61a..40ec85e 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -36,3 +36,16 @@ def get_random_like(A, randomness):
     rows, cols = A.nonzero()
     values = randomness(A.nnz)
     return sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
+
+
+def regularize_P(P, rho, size):
+    """Regularizes the matrix P by adding rho * I to it."""
+    if rho > 0:
+        reg = rho * sparse.eye(size, format='csc')
+        if P is None:
+            P_reg = reg
+        else:
+            P_reg = P + reg
+    else:
+        P_reg = P
+    return P_reg
diff --git a/examples/batch_LPGD.py b/examples/batch_LPGD.py
new file mode 100644
index 0000000..81fb4ab
--- /dev/null
+++ b/examples/batch_LPGD.py
@@ -0,0 +1,42 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc-tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 5):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel")
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel")
+    def f_backward():
+        DT_batch(xs, ys, ss, tau=0.1, rho=0.1)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+            (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
\ No newline at end of file

From 8b0f8ac2f54f3c43edc1d142687835d421fbc95e Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 13:35:48 +0200
Subject: [PATCH 03/13] Update Readme

---
 README.md | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 209 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3f85fb9..d0db92d 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,209 @@
-This branch is currently only a fork of the original diffcp implementation. The LPGD implementation will be added soon.
+# Lagrangian Proximal Gradient Descent - diffcp implementation
+
+This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp.
+It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives.
+The perturbation strength is controlled by the parameter tau, the parameter rho controls th eregularization strength.
+Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization.
+
+Example usage:
+```python
+x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD')
+...
+dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1)
+...
+dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1)
+```
+
+Alternatively the derivative keyword arguments can be passed directly on the forward pass:
+```python
+x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD', derivative_kwargs=dict(tau=0.1, rho=0.1))
+...
+dx, dy, ds = D(dA, db, dc)
+...
+dA, db, dc = DT(dx, dy, ds)
+```
+
+
+Below follows the original Readme:
+
+[![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml)
+
+# diffcp
+
+`diffcp` is a Python package for computing the derivative of a convex cone program, with respect to its problem data. The derivative is implemented as an abstract linear map, with methods for its forward application and its adjoint. 
+
+The implementation is based on the calculations in our paper [Differentiating through a cone program](http://web.stanford.edu/~boyd/papers/diff_cone_prog.html).
+
+### Installation
+`diffcp` is available on PyPI, as a source distribution. Install it with
+
+```bash
+pip install diffcp
+```
+
+You will need a C++11-capable compiler to build `diffcp`.
+
+`diffcp` requires:
+* [NumPy](https://github.com/numpy/numpy) >= 1.15
+* [SciPy](https://github.com/scipy/scipy) >= 1.10
+* [SCS](https://github.com/bodono/scs-python) >= 2.0.2
+* [pybind11](https://github.com/pybind/pybind11/tree/stable) >= 2.4
+* [threadpoolctl](https://github.com/joblib/threadpoolctl) >= 1.1
+* [ECOS](https://github.com/embotech/ecos-python) >= 2.0.10
+* [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) >= 0.5.1
+* Python >= 3.7
+
+`diffcp` uses Eigen; Eigen operations can be automatically vectorized by compilers. To enable vectorization, install with
+
+```bash
+MARCH_NATIVE=1 pip install diffcp
+```
+
+OpenMP can be enabled by passing extra arguments to your compiler. For example, on linux, you can tell gcc to activate the OpenMP extension by specifying the flag "-fopenmp":
+
+```bash
+OPENMP_FLAG="-fopenmp" pip install diffcp
+```
+
+To enable both vectorization and OpenMP (on linux), use
+
+```bash
+MARCH_NATIVE=1 OPENMP_FLAG="-fopenmp" pip install diffcp
+```
+
+### Cone programs
+`diffcp` differentiates through a primal-dual cone program pair. The primal problem must be expressed as 
+
+```
+minimize        c'x
+subject to      Ax + s = b
+                s in K
+```
+where  `x` and `s` are variables, `A`, `b` and `c` are the user-supplied problem data, and `K` is a user-defined convex cone. The corresponding dual problem is
+
+```
+minimize        b'y
+subject to      A'y + c == 0
+                y in K^*
+```
+
+with dual variable `y`.
+
+### Usage
+
+`diffcp` exposes the function
+
+```python
+solve_and_derivative(A, b, c, cone_dict, warm_start=None, solver=None, **kwargs).
+```
+
+This function returns a primal-dual solution `x`, `y`, and `s`, along with
+functions for evaluating the derivative and its adjoint (transpose).
+These functions respectively compute right and left multiplication of the derivative
+of the solution map at `A`, `b`, and `c` by a vector.
+The `solver` argument determines which solver to use; the available solvers
+are `solver="SCS"`, `solver="ECOS"`, and `solver="Clarabel"`.
+If no solver is specified, `diffcp` will choose the solver itself.
+In the case that the problem is not solved, i.e. the solver fails for some reason, we will raise
+a `SolverError` Exception.
+
+#### Arguments
+The arguments `A`, `b`, and `c` correspond to the problem data of a cone program.
+* `A` must be a [SciPy sparse CSC matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html).
+* `b` and `c` must be NumPy arrays.
+* `cone_dict` is a dictionary that defines the convex cone `K`.
+* `warm_start` is an optional tuple `(x, y, s)` at which to warm-start. (Note: this is only available for the SCS solver).
+* `**kwargs` are keyword arguments to forward to the solver (e.g., `verbose=False`).
+
+These inputs must conform to the [SCS convention](https://github.com/bodono/scs-python) for problem data. The keys in `cone_dict` correspond to the cones, with
+* `diffcp.ZERO` for the zero cone,
+* `diffcp.POS` for the positive orthant,
+* `diffcp.SOC` for a product of SOC cones,
+* `diffcp.PSD` for a product of PSD cones, and
+* `diffcp.EXP` for a product of exponential cones.
+
+The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).
+
+#### Return value
+The function `solve_and_derivative` returns a tuple
+
+```python
+(x, y, s, derivative, adjoint_derivative)
+```
+
+* `x`, `y`, and `s` are a primal-dual solution.
+
+* `derivative` is a function that applies the derivative at `(A, b, c)` to perturbations `dA`, `db`, `dc`. It has the signature 
+```derivative(dA, db, dc) -> dx, dy, ds```, where `dA` is a SciPy sparse CSC matrix with the same sparsity pattern as `A`, and `db` and `dc` are NumPy arrays. `dx`, `dy`, and `ds` are NumPy arrays, approximating the change in the primal-dual solution due to the perturbation.
+
+* `adjoint_derivative` is a function that applies the adjoint of the derivative to perturbations `dx`, `dy`, `ds`. It has the signature 
+```adjoint_derivative(dx, dy, ds) -> dA, db, dc```, where `dx`, `dy`, and `ds` are NumPy arrays.
+
+#### Example
+```python
+import numpy as np
+from scipy import sparse
+
+import diffcp
+
+cone_dict = {
+    diffcp.ZERO: 3,
+    diffcp.POS: 3,
+    diffcp.SOC: [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+A, b, c = diffcp.utils.random_cone_prog(m, n, cone_dict)
+x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict)
+
+# evaluate the derivative
+nonzeros = A.nonzero()
+data = 1e-4 * np.random.randn(A.size)
+dA = sparse.csc_matrix((data, nonzeros), shape=A.shape)
+db = 1e-4 * np.random.randn(m)
+dc = 1e-4 * np.random.randn(n)
+dx, dy, ds = D(dA, db, dc)
+
+# evaluate the adjoint of the derivative
+dx = c
+dy = np.zeros(m)
+ds = np.zeros(m)
+dA, db, dc = DT(dx, dy, ds)
+```
+
+For more examples, including the SDP example described in the paper, see the [`examples`](examples/) directory.
+
+### Citing
+If you wish to cite `diffcp`, please use the following BibTex:
+
+```
+@article{diffcp2019,
+    author       = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.},
+    title        = {Differentiating through a Cone Program},
+    journal      = {Journal of Applied and Numerical Optimization},
+    year         = {2019},
+    volume       = {1},
+    number       = {2},
+    pages        = {107--115},
+}
+
+@misc{diffcp,
+    author       = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.},
+    title        = {{diffcp}: differentiating through a cone program, version 1.0},
+    howpublished = {\url{https://github.com/cvxgrp/diffcp}},
+    year         = 2019
+}
+```
+
+The following thesis concurrently derived the mathematics behind differentiating cone programs.
+```
+@phdthesis{amos2019differentiable,
+  author       = {Brandon Amos},
+  title        = {{Differentiable Optimization-Based Modeling for Machine Learning}},
+  school       = {Carnegie Mellon University},
+  year         = 2019,
+  month        = May,
+}
+```

From 6e761ce6487e11bfa7b0c117e65259cf7d39f200 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 14:33:14 +0200
Subject: [PATCH 04/13] simplify argument passing

---
 README.md              |  6 ++--
 diffcp/cone_program.py | 63 +++++++++++++-----------------------------
 diffcp/utils.py        | 23 +++++++++++++++
 3 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/README.md b/README.md
index d0db92d..934136b 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
 
 This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp.
 It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives.
-The perturbation strength is controlled by the parameter tau, the parameter rho controls th eregularization strength.
+The perturbation strength is controlled by the parameter tau, the parameter rho controls the regularization strength.
 Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization.
 
 Example usage:
 ```python
-x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD')
+x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd')
 ...
 dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1)
 ...
@@ -16,7 +16,7 @@ dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1)
 
 Alternatively the derivative keyword arguments can be passed directly on the forward pass:
 ```python
-x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD', derivative_kwargs=dict(tau=0.1, rho=0.1))
+x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd', derivative_kwargs=dict(tau=0.1, rho=0.1))
 ...
 dx, dy, ds = D(dA, db, dc)
 ...
diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index ea3fb86..5443192 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -12,8 +12,7 @@
 
 import _diffcp
 import diffcp.cones as cone_lib
-import copy
-from diffcp.utils import regularize_P
+from diffcp.utils import regularize_P, embed_problem
 
 
 def pi(z, cones):
@@ -586,7 +585,7 @@ def derivative(dA, db, dc, **kwargs):
         """
 
         if "lpgd" in mode:
-            dx, dy, ds = derivative_lpgd(dA, db, dc, mode=mode, **derivative_kwargs, **kwargs)
+            dx, dy, ds = derivative_lpgd(dA, db, dc, **derivative_kwargs, **kwargs)
             return dx, dy, ds
         else:
             dQ = sparse.bmat([
@@ -623,7 +622,7 @@ def adjoint_derivative(dx, dy, ds, **kwargs):
         """
 
         if "lpgd" in mode:
-            dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, mode=mode, **derivative_kwargs, **kwargs)
+            dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, **derivative_kwargs, **kwargs)
         else:
             dw = -(x @ dx + y @ dy + s @ ds)
             dz = np.concatenate(
@@ -646,7 +645,7 @@ def adjoint_derivative(dx, dy, ds, **kwargs):
 
         return dA, db, dc
     
-    def derivative_lpgd(dA, db, dc, mode, tau, rho):
+    def derivative_lpgd(dA, db, dc, tau, rho):
         """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc
         with optional regularization rho.
         Args:
@@ -654,8 +653,6 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho):
                 pattern as the matrix `A` from the cone program
             db: NumPy array representing perturbation in `b`
             dc: NumPy array representing perturbation in `c`
-            mode: Mode of the derivative computation; options are
-                ["lpgd", "lpgd_left", "lpgd_right"] corresponding to central, left, and right finite differences.
             tau: Perturbation strength parameter
             rho: Regularization parameter
         Returns:
@@ -669,10 +666,10 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho):
             raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
         
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
-            x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs)
+            x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
-            x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho, solve_method, solver_kwargs)
+            x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
 
         if mode == "lpgd":
             dx = (x_right - x_left) / (2 * tau)
@@ -690,7 +687,7 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho):
             raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
         return dx, dy, ds
 
-    def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs):
+    def compute_perturbed_solution(dA, db, dc, tau, rho):
         """
         Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
         perturbations dA, db, dc and optional regularization rho.
@@ -702,8 +699,6 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs
             dc: NumPy array representing perturbation in `c`
             tau: Perturbation strength parameter
             rho: Regularization parameter
-            solve_method: Name of solver to use; SCS, ECOS, or Clarabel
-            solver_kwargs: Additional keyword arguments to send to the solver
         Returns:
             x_pert: Perturbed solution of the primal variable x
             y_pert: Perturbed solution of the dual variable y
@@ -729,7 +724,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs
         x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         return x_pert, y_pert, s_pert
     
-    def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho):
+    def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
         """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920]
         Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, 
         can be regarded as an efficient adjoint finite difference method.
@@ -741,7 +736,6 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho):
             dx: NumPy array representing perturbation in `x`
             dy: NumPy array representing perturbation in `y`
             ds: NumPy array representing perturbation in `s`
-            mode: Mode of the adjoint derivative computation; options arem ["lpgd", "lpgd_left", "lpgd_right"]
             tau: Perturbation strength parameter
             rho: Regularization parameter
         Returns:
@@ -755,10 +749,10 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho):
             raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
 
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
-            x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs)
+            x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
-            x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho, solve_method, solver_kwargs)
+            x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
 
         if mode == "lpgd":
             dc = (x_right - x_left) / (2 * tau)
@@ -778,11 +772,11 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho):
         dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape)
         return dA, db, dc
 
-    def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs):
+    def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
         """
         Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
         by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho:
-        argmin.     <x,Px> + <c,x> + tau (<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
+        argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
         subject to  Ax + s = b-tau*dy
                     s \in K
 
@@ -796,9 +790,9 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve
         We add a constraint x'=s, replace all appearances of s with x' and solve
         argmin.     <[x, x'], [[P+rho*I,     0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
                                [      0, rho*I]]
-        subject to  [[A,  I]; [x, x'] + [0; s] = [0; b-tau*dy]
+        subject to  [[A,  I]; [x, x'] + [s'; s] = [0; b-tau*dy]
                      [0, -I]]
-                    s \in K
+                    (s', s) \in (0 \times K)
         Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case.
         
         Args:
@@ -807,8 +801,6 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve
             ds: NumPy array representing perturbation in `s`
             tau: Perturbation strength parameter
             rho: Regularization parameter
-            solve_method: Name of solver to use; SCS, ECOS, or Clarabel
-            solver_kwargs: Additional keyword arguments to send to the solver
         Returns:
             x_pert: Perturbed solution of the primal variable x
             y_pert: Perturbed solution of the dual variable y
@@ -836,24 +828,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve
             x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         else:
             # Embed problem
-            A_emb = sparse.bmat([
-                [A, sparse.eye(m, format=A.format)],
-                [None, -sparse.eye(m, format=A.format)]
-            ]).tocsc()
-            b_emb = np.hstack([b, np.zeros(m)])
-            c_emb = np.hstack([c, np.zeros(m)])
-            if P is not None:
-                P_emb = sparse.bmat([
-                    [P, None],
-                    [None, np.zeros((m, m))]
-                ]).tocsc()
-            else:
-                P_emb = None
-            cone_dict_emb = copy.deepcopy(cone_dict)
-            if 'z' in cone_dict_emb:
-                cone_dict_emb['z'] += m
-            else:
-                cone_dict_emb['z'] = m
+            A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
 
             # Perturb problem
             b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
@@ -864,10 +839,10 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve
             c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
 
             # Set warm start
-            x_ws = np.hstack([x, s])
-            y_ws = np.hstack([y, y])
-            s_ws = np.hstack([s, s])
-            warm_start = (x_ws, y_ws, s_ws) if solve_method != "ECOS" else None
+            if solve_method == "ECOS":
+                warm_start = None
+            else:
+                warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
 
             # Solve the embedded problem
             result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
diff --git a/diffcp/utils.py b/diffcp/utils.py
index 40ec85e..4309f98 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -1,6 +1,7 @@
 import diffcp.cones as cone_lib
 import numpy as np
 from scipy import sparse
+from copy import deepcopy
 
 
 def scs_data_from_cvxpy_problem(problem):
@@ -49,3 +50,25 @@ def regularize_P(P, rho, size):
     else:
         P_reg = P
     return P_reg
+
+def embed_problem(A, b, c, P, cone_dict):   
+    m = b.shape[0]         
+    A_emb = sparse.bmat([
+        [A, sparse.eye(m, format=A.format)],
+        [None, -sparse.eye(m, format=A.format)]
+    ]).tocsc()
+    b_emb = np.hstack([b, np.zeros(m)])
+    c_emb = np.hstack([c, np.zeros(m)])
+    if P is not None:
+        P_emb = sparse.bmat([
+            [P, None],
+            [None, np.zeros((m, m))]
+        ]).tocsc()
+    else:
+        P_emb = None
+    cone_dict_emb = deepcopy(cone_dict)
+    if 'z' in cone_dict_emb:
+        cone_dict_emb['z'] += m
+    else:
+        cone_dict_emb['z'] = m
+    return A_emb, b_emb, c_emb, P_emb, cone_dict_emb

From e4f8e8277e76487066b6e3a040e6492f3854e8e0 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 14:33:27 +0200
Subject: [PATCH 05/13] add more lpgd examples

---
 examples/batch_ecos.py                    |  4 +-
 examples/batch_ecos_lpgd.py               | 46 ++++++++++++++++
 examples/{batch_LPGD.py => batch_lpgd.py} |  0
 examples/dual_example.py                  |  2 +-
 examples/dual_example_lpgd.py             | 39 +++++++++++++
 examples/ecos_example.py                  |  4 +-
 examples/ecos_example_lpgd.py             | 39 +++++++++++++
 examples/sdp_lpgd.py                      | 67 +++++++++++++++++++++++
 8 files changed, 196 insertions(+), 5 deletions(-)
 create mode 100644 examples/batch_ecos_lpgd.py
 rename examples/{batch_LPGD.py => batch_lpgd.py} (100%)
 create mode 100644 examples/dual_example_lpgd.py
 create mode 100644 examples/ecos_example_lpgd.py
 create mode 100644 examples/sdp_lpgd.py

diff --git a/examples/batch_ecos.py b/examples/batch_ecos.py
index 9eca81b..0684305 100644
--- a/examples/batch_ecos.py
+++ b/examples/batch_ecos.py
@@ -31,9 +31,9 @@ def time_function(f, N=1):
 for n_jobs in range(1, 8):
     def f_forward():
         return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
     xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
 
     def f_backward():
         DT_batch(xs, ys, ss, mode="lsqr")
diff --git a/examples/batch_ecos_lpgd.py b/examples/batch_ecos_lpgd.py
new file mode 100644
index 0000000..9f5b0a7
--- /dev/null
+++ b/examples/batch_ecos_lpgd.py
@@ -0,0 +1,46 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc - tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 8):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, 
+                                                 mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False,
+                                                                      mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+
+    def f_backward():
+        DT_batch(xs, ys, ss)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+          (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
diff --git a/examples/batch_LPGD.py b/examples/batch_lpgd.py
similarity index 100%
rename from examples/batch_LPGD.py
rename to examples/batch_lpgd.py
diff --git a/examples/dual_example.py b/examples/dual_example.py
index ad0c816..07e6c1d 100644
--- a/examples/dual_example.py
+++ b/examples/dual_example.py
@@ -9,7 +9,7 @@
 # defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }
diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py
new file mode 100644
index 0000000..93ced61
--- /dev/null
+++ b/examples/dual_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)
diff --git a/examples/ecos_example.py b/examples/ecos_example.py
index 9a64182..dcec447 100644
--- a/examples/ecos_example.py
+++ b/examples/ecos_example.py
@@ -9,7 +9,7 @@
 # defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }
@@ -23,7 +23,7 @@
 
 # We solve the cone program and get the derivative and its adjoint
 x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
-    A, b, c, K, solver="ECOS", verbose=False)
+    A, b, c, K, solve_method="ECOS", verbose=False)
 
 print("x =", x)
 print("y =", y)
diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py
new file mode 100644
index 0000000..c142253
--- /dev/null
+++ b/examples/ecos_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)
diff --git a/examples/sdp_lpgd.py b/examples/sdp_lpgd.py
new file mode 100644
index 0000000..e5cec85
--- /dev/null
+++ b/examples/sdp_lpgd.py
@@ -0,0 +1,67 @@
+import cvxpy as cp
+import diffcp
+import numpy as np
+
+import time
+
+
+def scs_data_from_cvxpy_problem(problem):
+    data = problem.get_problem_data(cp.SCS)[0]
+    cone_dims = cp.reductions.solvers.conic_solvers.scs_conif.dims_to_solver_dict(data[
+                                                                                  "dims"])
+    return data["A"], data["b"], data["c"], cone_dims
+
+
+def randn_symm(n):
+    A = np.random.randn(n, n)
+    return (A + A.T) / 2
+
+
+def randn_psd(n):
+    A = 1. / 10 * np.random.randn(n, n)
+    return A@A.T
+
+
+def main(n=3, p=3):
+    # Generate problem data
+    C = randn_psd(n)
+    As = [randn_symm(n) for _ in range(p)]
+    Bs = np.random.randn(p)
+
+    # Extract problem data using cvxpy
+    X = cp.Variable((n, n), PSD=True)
+    objective = cp.trace(C@X)
+    constraints = [cp.trace(As[i]@X) == Bs[i] for i in range(p)]
+    prob = cp.Problem(cp.Minimize(objective), constraints)
+    A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob)
+
+    # Print problem size
+    mn_plus_m_plus_n = A.size + b.size + c.size
+    n_plus_2n = c.size + 2 * b.size
+    entries_in_derivative = mn_plus_m_plus_n * n_plus_2n
+    print(f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)""")
+
+    # Compute solution and derivative maps
+    start = time.perf_counter()
+    x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+        A, b, c, cone_dims, eps=1e-5, mode="lpgd")
+    end = time.perf_counter()
+    print("Compute solution and set up derivative: %.2f s." % (end - start))
+
+    # Derivative
+    lpgd_args = dict(tau=0.1, rho=0.1)
+    start = time.perf_counter()
+    dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm(
+        C), np.zeros(y.size), np.zeros(s.size), **lpgd_args)
+    end = time.perf_counter()
+    print("Evaluate derivative: %.2f s." % (end - start))
+
+    # Adjoint of derivative
+    start = time.perf_counter()
+    dx, dy, ds = derivative(A, b, c, **lpgd_args)
+    end = time.perf_counter()
+    print("Evaluate adjoint of derivative: %.2f s." % (end - start))
+
+if __name__ == '__main__':
+    np.random.seed(0)
+    main(50, 25)

From 1fa3d05077486f018f041db8482a7f863b300a18 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 14:48:18 +0200
Subject: [PATCH 06/13] Catch error if perturbed problem infeasible

---
 diffcp/cone_program.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index 5443192..a2e511d 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -718,8 +718,12 @@ def compute_perturbed_solution(dA, db, dc, tau, rho):
         warm_start = (x, y, s)
 
         # Solve the perturbed problem
-        result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                     solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        try:
+            result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        except SolverError as e:
+            raise ValueError(f"Failed to solve perturbed problem: {e}. If it is infeasible consider decreasing "\
+                              "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
         # Extract the solutions
         x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         return x_pert, y_pert, s_pert
@@ -822,8 +826,12 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
 
             # Solve the perturbed problem
             # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update
-            result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            try:
+                result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                            solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            except SolverError as e:
+                raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\
+                                  "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
             # Extract the solutions
             x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         else:
@@ -845,8 +853,12 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
                 warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
 
             # Solve the embedded problem
-            result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
-                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            try:
+                result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                            solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+            except SolverError as e:
+                raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\
+                                  "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
             # Extract the solutions
             x_pert = result_pert['x'][:n]
             y_pert = result_pert['y'][:m]

From 133d30d8da6f734bdfa5f71a664170d15a3808f6 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 14:59:43 +0200
Subject: [PATCH 07/13] Give better error message on infeasibility

---
 diffcp/cone_program.py | 48 +++++++++++++++++++++++-------------------
 examples/batch_lpgd.py |  4 ++--
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index a2e511d..0725725 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -666,10 +666,18 @@ def derivative_lpgd(dA, db, dc, tau, rho):
             raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
         
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
-            x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
+            try:
+                x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
+            except SolverError as e:
+                raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\
+                                  "Consider decresing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
-            x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
+            try:
+                x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
+            except SolverError as e:
+                raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\
+                                  "Consider decresing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dx = (x_right - x_left) / (2 * tau)
@@ -718,12 +726,8 @@ def compute_perturbed_solution(dA, db, dc, tau, rho):
         warm_start = (x, y, s)
 
         # Solve the perturbed problem
-        try:
-            result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-        except SolverError as e:
-            raise ValueError(f"Failed to solve perturbed problem: {e}. If it is infeasible consider decreasing "\
-                              "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
+        result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                    solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
         # Extract the solutions
         x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         return x_pert, y_pert, s_pert
@@ -753,10 +757,18 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
             raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.")
 
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
-            x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
+            try:
+                x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
+            except SolverError as e:
+                raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\
+                                  "Consider decresing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
-            x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
+            try:
+                x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
+            except SolverError as e:
+                raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\
+                                  "Consider decresing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dc = (x_right - x_left) / (2 * tau)
@@ -826,12 +838,8 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
 
             # Solve the perturbed problem
             # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update
-            try:
-                result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                            solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-            except SolverError as e:
-                raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\
-                                  "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
+            result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
             # Extract the solutions
             x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         else:
@@ -853,12 +861,8 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
                 warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
 
             # Solve the embedded problem
-            try:
-                result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
-                                            solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-            except SolverError as e:
-                raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\
-                                  "tau or switching to 'lpgd_left' or 'lpgd_right' mode.")
+            result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
             # Extract the solutions
             x_pert = result_pert['x'][:n]
             y_pert = result_pert['y'][:m]
diff --git a/examples/batch_lpgd.py b/examples/batch_lpgd.py
index 81fb4ab..6992433 100644
--- a/examples/batch_lpgd.py
+++ b/examples/batch_lpgd.py
@@ -30,9 +30,9 @@ def time_function(f, N=1):
 for n_jobs in range(1, 5):
     def f_forward():
         return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel")
+                n_jobs_forward=n_jobs, n_jobs_backward=n_jobs)
     xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel")
+                n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left')
     def f_backward():
         DT_batch(xs, ys, ss, tau=0.1, rho=0.1)
 

From 761b8ba4998c2175d08f741b92d64b41e6ad505b Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 15:15:16 +0200
Subject: [PATCH 08/13] fix docstrings

---
 diffcp/cone_program.py | 2 +-
 diffcp/utils.py        | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index 0725725..bf71922 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -806,7 +806,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
         We add a constraint x'=s, replace all appearances of s with x' and solve
         argmin.     <[x, x'], [[P+rho*I,     0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
                                [      0, rho*I]]
-        subject to  [[A,  I]; [x, x'] + [s'; s] = [0; b-tau*dy]
+        subject to  [[A,  I]; [x, x'] + [s'; s] = [b-tau*dy; 0]
                      [0, -I]]
                     (s', s) \in (0 \times K)
         Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case.
diff --git a/diffcp/utils.py b/diffcp/utils.py
index 4309f98..4ecf1df 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -51,7 +51,14 @@ def regularize_P(P, rho, size):
         P_reg = P
     return P_reg
 
-def embed_problem(A, b, c, P, cone_dict):   
+def embed_problem(A, b, c, P, cone_dict):
+    """Embeds the problem into a larger problem that allows costs on the slack variables
+    A_emb = [A, I; 0, -I]
+    b_emb = [b; 0]
+    c_emb = [c; 0]
+    P_emb = [P, 0; 0, 0]
+    cone_dict_emb = cone_dict with z shifted by m
+    """
     m = b.shape[0]         
     A_emb = sparse.bmat([
         [A, sparse.eye(m, format=A.format)],

From ca0eb6454ebcafe245a15a332c2180c93e93b8a9 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Fri, 9 Aug 2024 15:28:20 +0200
Subject: [PATCH 09/13] minor changes

---
 diffcp/cone_program.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index bf71922..3296b26 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -669,15 +669,15 @@ def derivative_lpgd(dA, db, dc, tau, rho):
             try:
                 x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
             except SolverError as e:
-                raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\
-                                  "Consider decresing tau or swiching to 'lpgd_left' mode.")
+                raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
+                                   "Consider decresing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
                 x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
             except SolverError as e:
-                raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\
-                                  "Consider decresing tau or swiching to 'lpgd_right' mode.")
+                raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
+                                   "Consider decresing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dx = (x_right - x_left) / (2 * tau)
@@ -760,15 +760,15 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
             try:
                 x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
             except SolverError as e:
-                raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\
-                                  "Consider decresing tau or swiching to 'lpgd_left' mode.")
+                raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
+                                   "Consider decresing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
                 x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
             except SolverError as e:
-                raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\
-                                  "Consider decresing tau or swiching to 'lpgd_right' mode.")
+                raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
+                                   "Consider decresing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dc = (x_right - x_left) / (2 * tau)
@@ -809,7 +809,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
         subject to  [[A,  I]; [x, x'] + [s'; s] = [b-tau*dy; 0]
                      [0, -I]]
                     (s', s) \in (0 \times K)
-        Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case.
+        Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
         
         Args:
             dx: NumPy array representing perturbation in `x`

From c73b116d2c3f7240f6a71a67d0ce487262a6426c Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Mon, 26 Aug 2024 10:42:46 +0200
Subject: [PATCH 10/13] cosmetic changes

---
 diffcp/cone_program.py | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index 3296b26..ff3415d 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -654,7 +654,7 @@ def derivative_lpgd(dA, db, dc, tau, rho):
             db: NumPy array representing perturbation in `b`
             dc: NumPy array representing perturbation in `c`
             tau: Perturbation strength parameter
-            rho: Regularization parameter
+            rho: Regularization strength parameter
         Returns:
            NumPy arrays dx, dy, ds, the result of applying the derivative
            to the perturbations.
@@ -670,14 +670,14 @@ def derivative_lpgd(dA, db, dc, tau, rho):
                 x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
             except SolverError as e:
                 raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
-                                   "Consider decresing tau or swiching to 'lpgd_left' mode.")
+                                   "Consider decreasing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
                 x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
             except SolverError as e:
                 raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
-                                   "Consider decresing tau or swiching to 'lpgd_right' mode.")
+                                   "Consider decreasing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dx = (x_right - x_left) / (2 * tau)
@@ -706,7 +706,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho):
             db: NumPy array representing perturbation in `b`
             dc: NumPy array representing perturbation in `c`
             tau: Perturbation strength parameter
-            rho: Regularization parameter
+            rho: Regularization strength parameter
         Returns:
             x_pert: Perturbed solution of the primal variable x
             y_pert: Perturbed solution of the dual variable y
@@ -718,7 +718,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho):
         b_pert = b + tau * db
         c_pert = c + tau * dc
 
-        # Regularize
+        # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
         P_reg = regularize_P(P, rho=rho, size=n)
         c_pert_reg = c_pert - rho * x
 
@@ -745,7 +745,7 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
             dy: NumPy array representing perturbation in `y`
             ds: NumPy array representing perturbation in `s`
             tau: Perturbation strength parameter
-            rho: Regularization parameter
+            rho: Regularization strength parameter
         Returns:
             (`dA`, `db`, `dc`), the result of applying the LPGD to the
             perturbations; the sparsity pattern of `dA` matches that of `A`.
@@ -761,14 +761,14 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
                 x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
             except SolverError as e:
                 raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
-                                   "Consider decresing tau or swiching to 'lpgd_left' mode.")
+                                   "Consider decreasing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
                 x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
             except SolverError as e:
                 raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
-                                   "Consider decresing tau or swiching to 'lpgd_right' mode.")
+                                   "Consider decreasing tau or swiching to 'lpgd_right' mode.")
 
         if mode == "lpgd":
             dc = (x_right - x_left) / (2 * tau)
@@ -791,7 +791,8 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
     def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
         """
         Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
-        by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho:
+        by solving the following perturbed problem with perturbations dx, dy, ds 
+        and perturbation/regularization parameters tau/rho:
         argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
         subject to  Ax + s = b-tau*dy
                     s \in K
@@ -804,11 +805,11 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
 
         For ds!=0 we rewrite the problem as an embedded problem.
         We add a constraint x'=s, replace all appearances of s with x' and solve
-        argmin.     <[x, x'], [[P+rho*I,     0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
+        argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
                                [      0, rho*I]]
-        subject to  [[A,  I]; [x, x'] + [s'; s] = [b-tau*dy; 0]
+        subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
                      [0, -I]]
-                    (s', s) \in (0 \times K)
+                    (s', s) \in (0 x K)
         Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
         
         Args:
@@ -816,7 +817,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
             dy: NumPy array representing perturbation in `y`
             ds: NumPy array representing perturbation in `s`
             tau: Perturbation strength parameter
-            rho: Regularization parameter
+            rho: Regularization strength parameter
         Returns:
             x_pert: Perturbed solution of the primal variable x
             y_pert: Perturbed solution of the dual variable y
@@ -825,11 +826,11 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
 
         # The cases ds = 0 and ds != 0 are handled separately, see docstring
         if np.isclose(np.sum(np.abs(ds)), 0):
-            # Perturb problem
+            # Perturb problem (Note: perturb primal and dual linear terms in different directions)
             c_pert = c + tau * dx
             b_pert = b - tau * dy
 
-            # Regularize
+            # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
             P_reg = regularize_P(P, rho=rho, size=n)
             c_pert_reg = c_pert - rho * x
 
@@ -837,20 +838,20 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
             warm_start = (x, y, s) if solve_method != "ECOS" else None
 
             # Solve the perturbed problem
-            # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update
+            # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
             result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
                                          solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
             # Extract the solutions
             x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
         else:
-            # Embed problem
+            # Embed problem (see docstring)
             A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
 
-            # Perturb problem
+            # Perturb problem (Note: perturb primal and dual linear terms in different directions)
             b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
             c_emb_pert = c_emb + tau * np.hstack([dx, ds])
             
-            # Regularize
+            # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
             P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
             c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
 

From ca78344d31f8a1c799735e428b641017484440d6 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Mon, 26 Aug 2024 11:45:16 +0200
Subject: [PATCH 11/13] update readme

---
 README.md | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 934136b..46f00c0 100644
--- a/README.md
+++ b/README.md
@@ -1,31 +1,3 @@
-# Lagrangian Proximal Gradient Descent - diffcp implementation
-
-This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp.
-It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives.
-The perturbation strength is controlled by the parameter tau, the parameter rho controls the regularization strength.
-Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization.
-
-Example usage:
-```python
-x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd')
-...
-dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1)
-...
-dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1)
-```
-
-Alternatively the derivative keyword arguments can be passed directly on the forward pass:
-```python
-x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd', derivative_kwargs=dict(tau=0.1, rho=0.1))
-...
-dx, dy, ds = D(dA, db, dc)
-...
-dA, db, dc = DT(dx, dy, ds)
-```
-
-
-Below follows the original Readme:
-
 [![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml)
 
 # diffcp
@@ -124,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs-
 
 The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).
 
+To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=LPGD` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function.
+
 #### Return value
 The function `solve_and_derivative` returns a tuple
 

From 3d1235c4ae070135738ff6dc28e6fcc97b7c8943 Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Mon, 26 Aug 2024 12:38:01 +0200
Subject: [PATCH 12/13] move perturbed solution computation to utils

---
 diffcp/cone_program.py | 132 +++--------------------------------------
 diffcp/utils.py        | 124 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 123 deletions(-)

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index ff3415d..99d2edc 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -12,7 +12,7 @@
 
 import _diffcp
 import diffcp.cones as cone_lib
-from diffcp.utils import regularize_P, embed_problem
+from diffcp.utils import compute_perturbed_solution, compute_adjoint_perturbed_solution
 
 
 def pi(z, cones):
@@ -667,14 +667,16 @@ def derivative_lpgd(dA, db, dc, tau, rho):
         
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
             try:
-                x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho)
+                x_right, y_right, s_right = compute_perturbed_solution(
+                    dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
             except SolverError as e:
                 raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
                                    "Consider decreasing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
-                x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho)
+                x_left, y_left, s_left = compute_perturbed_solution(
+                    dA, db, dc, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
             except SolverError as e:
                 raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
                                    "Consider decreasing tau or swiching to 'lpgd_right' mode.")
@@ -694,43 +696,6 @@ def derivative_lpgd(dA, db, dc, tau, rho):
         else:
             raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
         return dx, dy, ds
-
-    def compute_perturbed_solution(dA, db, dc, tau, rho):
-        """
-        Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
-        perturbations dA, db, dc and optional regularization rho.
-        
-        Args:
-            dA: SciPy sparse matrix in CSC format; must have same sparsity
-                pattern as the matrix `A` from the cone program
-            db: NumPy array representing perturbation in `b`
-            dc: NumPy array representing perturbation in `c`
-            tau: Perturbation strength parameter
-            rho: Regularization strength parameter
-        Returns:
-            x_pert: Perturbed solution of the primal variable x
-            y_pert: Perturbed solution of the dual variable y
-            s_pert: Perturbed solution of the slack variable s
-        """
-
-        # Perturb the problem
-        A_pert = A + tau * dA
-        b_pert = b + tau * db
-        c_pert = c + tau * dc
-
-        # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
-        P_reg = regularize_P(P, rho=rho, size=n)
-        c_pert_reg = c_pert - rho * x
-
-        # Set warm start
-        warm_start = (x, y, s)
-
-        # Solve the perturbed problem
-        result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                    solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-        # Extract the solutions
-        x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
-        return x_pert, y_pert, s_pert
     
     def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
         """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920]
@@ -758,14 +723,16 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
 
         if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
             try:
-                x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho)
+                x_right, y_right, _ = compute_adjoint_perturbed_solution(
+                    dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
             except SolverError as e:
                 raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
                                    "Consider decreasing tau or swiching to 'lpgd_left' mode.")
 
         if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
             try:
-                x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho)
+                x_left, y_left, _ = compute_adjoint_perturbed_solution(
+                    dx, dy, ds, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
             except SolverError as e:
                 raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
                                    "Consider decreasing tau or swiching to 'lpgd_right' mode.")
@@ -788,87 +755,6 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho):
         dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape)
         return dA, db, dc
 
-    def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho):
-        """
-        Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
-        by solving the following perturbed problem with perturbations dx, dy, ds 
-        and perturbation/regularization parameters tau/rho:
-        argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
-        subject to  Ax + s = b-tau*dy
-                    s \in K
-
-        For ds=0 we solve
-        argmin.     <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
-        subject to  Ax + s = b-tau*dy
-                    s \in K
-        This is just a perturbed instance of the forward optimization problem.
-
-        For ds!=0 we rewrite the problem as an embedded problem.
-        We add a constraint x'=s, replace all appearances of s with x' and solve
-        argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
-                               [      0, rho*I]]
-        subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
-                     [0, -I]]
-                    (s', s) \in (0 x K)
-        Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
-        
-        Args:
-            dx: NumPy array representing perturbation in `x`
-            dy: NumPy array representing perturbation in `y`
-            ds: NumPy array representing perturbation in `s`
-            tau: Perturbation strength parameter
-            rho: Regularization strength parameter
-        Returns:
-            x_pert: Perturbed solution of the primal variable x
-            y_pert: Perturbed solution of the dual variable y
-            s_pert: Perturbed solution of the slack variable s
-        """
-
-        # The cases ds = 0 and ds != 0 are handled separately, see docstring
-        if np.isclose(np.sum(np.abs(ds)), 0):
-            # Perturb problem (Note: perturb primal and dual linear terms in different directions)
-            c_pert = c + tau * dx
-            b_pert = b - tau * dy
-
-            # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
-            P_reg = regularize_P(P, rho=rho, size=n)
-            c_pert_reg = c_pert - rho * x
-
-            # Set warm start
-            warm_start = (x, y, s) if solve_method != "ECOS" else None
-
-            # Solve the perturbed problem
-            # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
-            result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
-                                         solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-            # Extract the solutions
-            x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
-        else:
-            # Embed problem (see docstring)
-            A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
-
-            # Perturb problem (Note: perturb primal and dual linear terms in different directions)
-            b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
-            c_emb_pert = c_emb + tau * np.hstack([dx, ds])
-            
-            # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
-            P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
-            c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
-
-            # Set warm start
-            if solve_method == "ECOS":
-                warm_start = None
-            else:
-                warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
-
-            # Solve the embedded problem
-            result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
-                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
-            # Extract the solutions
-            x_pert = result_pert['x'][:n]
-            y_pert = result_pert['y'][:m]
-            s_pert = result_pert['x'][n:n+m]
-        return x_pert, y_pert, s_pert
 
     result["D"] = derivative
     result["DT"] = adjoint_derivative
diff --git a/diffcp/utils.py b/diffcp/utils.py
index 4ecf1df..588d9ac 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -51,6 +51,7 @@ def regularize_P(P, rho, size):
         P_reg = P
     return P_reg
 
+
 def embed_problem(A, b, c, P, cone_dict):
     """Embeds the problem into a larger problem that allows costs on the slack variables
     A_emb = [A, I; 0, -I]
@@ -79,3 +80,126 @@ def embed_problem(A, b, c, P, cone_dict):
     else:
         cone_dict_emb['z'] = m
     return A_emb, b_emb, c_emb, P_emb, cone_dict_emb
+
+
+def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
+    perturbations dA, db, dc and optional regularization rho.
+    
+    Args:
+        dA: SciPy sparse matrix in CSC format; must have same sparsity
+            pattern as the matrix `A` from the cone program
+        db: NumPy array representing perturbation in `b`
+        dc: NumPy array representing perturbation in `c`
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # Perturb the problem
+    A_pert = A + tau * dA
+    b_pert = b + tau * db
+    c_pert = c + tau * dc
+
+    # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+    P_reg = regularize_P(P, rho=rho, size=n)
+    c_pert_reg = c_pert - rho * x
+
+    # Set warm start
+    warm_start = (x, y, s)
+
+    # Solve the perturbed problem
+    result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+    # Extract the solutions
+    x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    return x_pert, y_pert, s_pert
+
+
+def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
+    by solving the following perturbed problem with perturbations dx, dy, ds 
+    and perturbation/regularization parameters tau/rho:
+    argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
+    subject to  Ax + s = b-tau*dy
+                s \in K
+
+    For ds=0 we solve
+    argmin.     <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
+    subject to  Ax + s = b-tau*dy
+                s \in K
+    This is just a perturbed instance of the forward optimization problem.
+
+    For ds!=0 we rewrite the problem as an embedded problem.
+    We add a constraint x'=s, replace all appearances of s with x' and solve
+    argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
+                            [      0, rho*I]]
+    subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
+                    [0, -I]]
+                (s', s) \in (0 x K)
+    Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
+    
+    Args:
+        dx: NumPy array representing perturbation in `x`
+        dy: NumPy array representing perturbation in `y`
+        ds: NumPy array representing perturbation in `s`
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # The cases ds = 0 and ds != 0 are handled separately, see docstring
+    if np.isclose(np.sum(np.abs(ds)), 0):
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        c_pert = c + tau * dx
+        b_pert = b - tau * dy
+
+        # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+        P_reg = regularize_P(P, rho=rho, size=n)
+        c_pert_reg = c_pert - rho * x
+
+        # Set warm start
+        warm_start = (x, y, s) if solve_method != "ECOS" else None
+
+        # Solve the perturbed problem
+        # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
+        result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    else:
+        # Embed problem (see docstring)
+        A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
+
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
+        c_emb_pert = c_emb + tau * np.hstack([dx, ds])
+        
+        # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
+        P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
+        c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
+
+        # Set warm start
+        if solve_method == "ECOS":
+            warm_start = None
+        else:
+            warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
+
+        # Solve the embedded problem
+        result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                    solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert = result_pert['x'][:n]
+        y_pert = result_pert['y'][:m]
+        s_pert = result_pert['x'][n:n+m]
+    return x_pert, y_pert, s_pert

From ca1be3f09845422b0840088446c84f8b228dc18b Mon Sep 17 00:00:00 2001
From: Anselm Paulus <anselm.paulus@uni-tuebingen.de>
Date: Mon, 26 Aug 2024 12:40:56 +0200
Subject: [PATCH 13/13] cosmetics

---
 diffcp/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diffcp/utils.py b/diffcp/utils.py
index 588d9ac..affe8b3 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -139,9 +139,9 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_di
     For ds!=0 we rewrite the problem as an embedded problem.
     We add a constraint x'=s, replace all appearances of s with x' and solve
     argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
-                            [      0, rho*I]]
+                           [      0, rho*I]]
     subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
-                    [0, -I]]
+                 [0, -I]]
                 (s', s) \in (0 x K)
     Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).