Merge branch 'v2.2'

pzivich · Apr 23, 2024 · 992ade8 · 992ade8
2 parents fc28b60 + cf805ad
commit 992ade8
Show file tree

Hide file tree

Showing 27 changed files with 2,244 additions and 663 deletions.
diff --git a/README.md b/README.md
@@ -18,10 +18,10 @@ of M-estimation.
 ## M-Estimation and Estimating Equations
 
 Here, we provide a brief overview of M-estimation theory. For more detailed introductions to M-estimation, see Ross
-et al. (2024) or Chapter 7 of Boos & Stefanski (2013). M-estimation is a generalization of likelihood-based methods.
-*M-estimators* are solutions to estimating equations. To apply the M-estimator, we solve the estimating equations using
-observed data. This is similar to other approaches, but the key advantage of M-Estimators is estimation of the variance
-via the sandwich variance.
+et al. (2024), Stefanski & Boos (2002), or Chapter 7 of Boos & Stefanski (2013). M-estimation is a generalization of
+likelihood-based methods. *M-estimators* are solutions to estimating equations. To apply the M-estimator, we solve the
+estimating equations using observed data. This is similar to other approaches, but the key advantage of M-Estimators is
+variance estimation via the empirical sandwich variance estimator.
 
 While M-Estimation is a powerful tool, the derivatives and matrix algebra can quickly become unwieldy. This is where 
 `delicatessen` comes in. `delicatessen` takes estimating functions and data, and solves for the parameter estimates,
@@ -84,10 +84,10 @@ at [delicatessen website](https://deli.readthedocs.io/en/latest/).
 Boos DD, & Stefanski LA. (2013). M-estimation (estimating equations). In Essential Statistical Inference
 (pp. 297-337). Springer, New York, NY.
 
+Stefanski LA, & Boos DD. (2002). The calculus of M-estimation. *The American Statistician*, 56(1), 29-38.
+
 Ross RK, Zivich PN, Stringer JS, & Cole SR. (2024). M-estimation for common epidemiological measures: introduction and
 applied examples. *International Journal of Epidemiology*, 53(2).
 
-Stefanski LA, & Boos DD. (2002). The calculus of M-estimation. *The American Statistician*, 56(1), 29-38.
-
 Zivich PN, Klose M, Cole SR, Edwards JK, & Shook-Sa BE. (2022). Delicatessen: M-Estimation in Python.
 *arXiv preprint arXiv:2203.11300*.
diff --git a/delicatessen/__init__.py b/delicatessen/__init__.py
@@ -12,3 +12,4 @@
 from .version import __version__
 
 from .mestimation import MEstimator
+from .sandwich import compute_sandwich
diff --git a/delicatessen/data.py b/delicatessen/data.py
@@ -14,7 +14,13 @@ def load_shaq_free_throws():
 
     Returns
     -------
-    ndarray
+    array :
+        Returns a 24-by-2 NumPy array.
+
+    References
+    ----------
+    Boos DD, & Stefanski LA. (2013). M-estimation (estimating equations). In Essential Statistical Inference
+    (pp. 297-337). Springer, New York, NY.
     """
     d = np.array([[ 1,  4,  5],
                   [ 2,  5, 11],
@@ -43,7 +49,7 @@ def load_shaq_free_throws():
 
 
 def load_inderjit():
-    """Load example data from Inderjit et al. (2002) on the dose-response of herbicide on perennial ryegrass growth
+    """Load example data from Inderjit et al. (2002) on the dose-response of herbicide on perennial ryegrass growth.
 
     Notes
     -----
@@ -53,7 +59,13 @@ def load_inderjit():
 
     Returns
     -------
-    ndarray
+    array :
+        Returns a 24-by-2 NumPy array.
+
+    References
+    ----------
+    Inderjit, Streibig JC, & Olofsdotter M. (2002). Joint action of phenolic acid mixtures and its significance in
+    allelopathy research. *Physiologia Plantarum*, 114(3), 422-428.
     """
     d = np.array([[7.5800000,  0.00],
                   [8.0000000,  0.00],
@@ -83,14 +95,22 @@ def load_inderjit():
 
 
 def load_robust_regress(outlier=True):
-    """Load illustrative example for robust linear regression.
+    """Load illustrative example of robust linear regression published in Zivich et al. (2022).
 
     Parameters
     ----------
+    outlier : bool, optional
+        Whether to induce the outlier (``True``) or not (``False``).
 
     Returns
     -------
+    array :
+        Returns a 15-by-2 NumPy array.
 
+    References
+    ----------
+    Zivich PN, Klose M, Cole SR, Edwards JK, & Shook-Sa BE. (2022). Delicatessen: M-estimation in Python.
+    *arXiv:2203.11300*.
     """
     height = [168.519, 166.944, 164.327, 164.058, 166.212, 167.358,
               165.244, 169.352, 159.386, 166.953, 163.876,

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
@@ -4,6 +4,137 @@
 from scipy.stats import norm
 
 
+def approx_differentiation(xk, f, epsilon=1e-9, method='capprox'):
+    r"""Numerical approximation to compute the gradient. This function implements numerical approximation methods for
+    derivatives generally (i.e., it provides the first-order forward, backward, and central difference approximations).
+
+    Note
+    ----
+    This functionality is only intended for use behind the scenes in ``delicatessen``. Numerical approximation is
+    implemented from scratch to offer backward and central difference approximations (SciPy's ``approx_fprime`` only
+    offers the forward difference).
+
+
+    The forward difference approximation is
+
+    .. math::
+
+        \frac{f(x + \epsilon) - f(x)}{\epsilon}
+
+    the backward difference approximation is
+
+    .. math::
+
+        \frac{f(x) - f(x - \epsilon)}{\epsilon}
+
+    and the central difference approximation is
+
+    .. math::
+
+        \frac{f(x + \epsilon) - f(x - \epsilon)}{2\epsilon}
+
+    Here, the numerical approximation is implemented by generating matrices for output from a function evaluated under
+    minor perturbations (determined by ``epsilon``) of each input argument. These matrices are then subtracted from
+    each other and then scaled by ``epsilon``.
+
+    Parameters
+    ----------
+    xk : ndarray, list, shape (n, )
+        Point(s) or coordinate vector to evaluate the gradient at.
+    f : callable
+        Function of which to estimate the gradient of.
+    epsilon : float, optional
+        Increment to perturb the points by to compute the gradient. This should be a small value
+    method : str, optional
+        Approximation to use to compute the gradient. Default is `capprox` which uses the central difference method.
+        One can also specify the forward difference (`fapprox`) or backward difference (`bapprox`) methods.
+
+    Returns
+    -------
+    numpy.array :
+        Corresponding array of the pairwise derivatives for all different input x values.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from delicatessen.derivative import approx_differentiation
+
+    To illustrate use, we will compute the derivative of the following function
+
+    .. math::
+
+        f(x) = x^2 - x^1 + sin(x + \sqrt{x})
+
+    >>> def f(x):
+    >>>     return x**2 - x + np.sin(x + np.sqrt(x))
+
+    If you work out the deriative by-hand, you will end up with the following
+
+    .. math::
+
+        2x - 1 + \left( \frac{1}{2 \sqrt{x}} + 1 \right) \cos(x + \sqrt{x})
+
+    Instead, we can use the central difference approximation to evaluate the derivative at a specific point. Here, we
+    will evaluate the derivative at :math:`x=1`
+
+    >>> dy = approx_differentiation(xk=[1, ], f=f)
+
+    which returns ``0.37578``, which is close to plugging in :math:`x=1` into the previous equation.
+
+    The derivative of a function with multiple inputs and multiple outputs can also be evaluated. Consider the following
+    example with three inputs and two outputs
+
+    >>> def f(x):
+    >>>     return [x[0]**2 - x[1], np.sin(np.sqrt(x[1]) + x[2]) + x[2]*(x[1]**2)]
+
+    >>> approx_differentiation(xk=[0.7, 1.2, -0.9], f=f, method='fapprox')
+    >>> approx_differentiation(xk=[0.7, 1.2, -0.9], f=f, method='bapprox')
+    >>> approx_differentiation(xk=[0.7, 1.2, -0.9], f=f, method='capprox')
+
+    which will return a 2-by-3 array of all the x-y pair derivatives at the given values. Here, the rows correspond to
+    the output and the columns correspond to the inputs. The approximation methods are forward, backward, and central.
+    """
+    # Setup parameters for call
+    xk = np.asarray(xk)                               # Convert inputs into NumPy array if not already
+    xp = xk.shape[0]                                  # Get the number of parameters in the input
+    shift = np.identity(n=xk.shape[0]) * epsilon      # Define the shift matrix for the partials
+
+    def generate_matrix(x_shift, f):
+        """Internal function to generate a matrix of the outputs under the parameter shifts, defined by x_shift"""
+        shift_matrix = []                             # Storage for matrices
+        for j in range(xp):                           # Looping over shift combinations
+            shift_matrix.append(f(x_shift[j, :]))     # ... compute output at shifted values
+        return np.asarray(shift_matrix)               # Return matrix under all shifts
+
+    # Computing the gradient using the corresponding method
+    if method == 'capprox':                           # Central difference
+        lower = (xk - shift)                          # ... defining lower shift
+        f0 = generate_matrix(x_shift=lower, f=f)      # ... output for lower shift
+        upper = (xk + shift)                          # ... defining upper shift
+        f1 = generate_matrix(x_shift=upper, f=f)      # ... output for upper shift
+        deriv = (f1 - f0).T / (2*epsilon)             # ... central difference approximation
+    elif method == 'bapprox':                         # Backward difference
+        lower = (xk - shift)                          # ... defining lower shift
+        f0 = generate_matrix(x_shift=lower, f=f)      # ... output for lower shift
+        f_eval = f(xk)                                # ... upper is held fixed
+        f1 = np.asarray([f_eval for i in range(xp)])  # ... stack upper into a matrix
+        deriv = (f1 - f0).T / epsilon                 # ... backward difference approximation
+    elif method == 'fapprox':                         # Forward difference
+        f_eval = f(xk)                                # ... lower is held fixed
+        f0 = np.asarray([f_eval for i in range(xp)])  # ... stack lower into a matrix
+        upper = (xk + shift)                          # ... defining upper shift
+        f1 = generate_matrix(x_shift=upper, f=f)      # ... output for upper shift
+        deriv = (f1 - f0).T / epsilon                 # ... forward difference approximation
+    else:                                             # Otherwise error
+        raise ValueError("Method chosen is not supported")
+
+    # Processing the final return based on parameter shape
+    if xp == 1:
+        return np.asarray([deriv, ])
+    else:
+        return deriv
+
+
 def auto_differentiation(xk, f):
     r"""Forward-mode automatic differentiation. Automatic differentiation offers a way to compute the exact derivative,
     rather than numerically approximated (i.e., the central difference method). Automatic differentiation iteratively
@@ -22,9 +153,9 @@ def auto_differentiation(xk, f):
 
     Parameters
     ----------
-    xk: ndarray, list, shape (n, )
+    xk : ndarray, list, shape (n, )
         Point(s) or coordinate vector to evaluate the gradient at.
-    f: callable
+    f : callable
         Function of which to estimate the gradient of.
 
     Returns

diff --git a/delicatessen/estimating_equations/__init__.py b/delicatessen/estimating_equations/__init__.py
@@ -7,7 +7,7 @@
 from .dose_response import (ee_4p_logistic, ee_3p_logistic, ee_2p_logistic,
                             ee_effective_dose_delta)
 
-from .measurement import (ee_rogan_gladen,
+from .measurement import (ee_rogan_gladen, ee_rogan_gladen_extended
                           )
 
 from .regression import (ee_regression, ee_glm, ee_mlogit,

diff --git a/delicatessen/estimating_equations/basic.py b/delicatessen/estimating_equations/basic.py
@@ -20,11 +20,6 @@ def ee_mean(theta, y):
 
         \sum_{i=1}^n (Y_i - \theta) = 0
 
-    Note
-    ----
-    All provided estimating equations are meant to be wrapped inside a user-specified function. Throughtout, these
-    user-defined functions are defined as ``psi``.
-
     Parameters
     ----------
     theta : ndarray, list, vector
@@ -36,7 +31,7 @@ def ee_mean(theta, y):
     Returns
     -------
     array :
-        Returns a 1-by-n NumPy array evaluated for the input ``theta`` and ``y``
+        Returns a 1-by-`n` NumPy array evaluated for the input ``theta`` and ``y``
 
     Examples
     --------
@@ -89,11 +84,6 @@ def ee_mean_robust(theta, y, k, loss='huber', lower=None, upper=None):
     Tukey's biweight, Andrew's Sine, and Hampel. See ``robust_loss_function`` for further details on the loss
     functions for the robust mean.
 
-    Note
-    ----
-    All provided estimating equations are meant to be wrapped inside a user-specified function. Throughtout, these
-    user-defined functions are defined as ``psi``.
-
     Parameters
     ----------
     theta : ndarray, list, vector
@@ -105,18 +95,18 @@ def ee_mean_robust(theta, y, k, loss='huber', lower=None, upper=None):
         Tuning or hyperparameter for the chosen loss function. Notice that the choice of hyperparameter depends on the
         loss function.
     loss : str, optional
-        Robust loss function to use. Default is 'huber'. Options include 'andrew', 'hampel', 'huber', 'tukey'.
+        Robust loss function to use. Default is ``'huber'``. Options include ``'andrew'``, ``'hampel'``, ``'tukey'``.
     lower : int, float, None, optional
-        Lower parameter for the 'hampel' loss function. This parameter does not impact the other loss functions.
+        Lower parameter for the Hampel loss function. This parameter does not impact the other loss functions.
         Default is ``None``.
     upper : int, float, None, optional
-        Upper parameter for the 'hampel' loss function. This parameter does not impact the other loss functions.
+        Upper parameter for the Hampel loss function. This parameter does not impact the other loss functions.
         Default is ``None``.
 
     Returns
     -------
     array :
-        Returns a 1-by-n NumPy array evaluated for the input theta and y
+        Returns a 1-by-`n` NumPy array evaluated for the input ``theta`` and ``y``.
 
     Examples
     --------
@@ -186,12 +176,6 @@ def ee_mean_variance(theta, y):
     Unlike ``ee_mean``, ``theta`` consists of 2 parameters. The output covariance matrix will also provide estimates
     for each of the ``theta`` values.
 
-    Note
-    ----
-    All provided estimating equations are meant to be wrapped inside a user-specified function. Throughtout, these
-    user-defined functions are defined as ``psi``.
-
-
     Parameters
     ----------
     theta : ndarray, list, vector
@@ -204,7 +188,7 @@ def ee_mean_variance(theta, y):
     Returns
     -------
     array :
-        Returns a 2-by-n NumPy array evaluated for the input theta and y
+        Returns a 2-by-`n` NumPy array evaluated for the input ``theta`` and ``y``.
 
     Examples
     --------
@@ -273,12 +257,12 @@ def ee_percentile(theta, y, q):
         1-dimensional vector of n observed values. No missing data should be included (missing data may cause unexpected
         behavior when attempting to calculate the mean).
     q : float
-        Percentile to calculate. Must be (0, 1)
+        Percentile to calculate. Must be :math:`(0, 1)`
 
     Returns
     -------
     array :
-        Returns a 1-by-n NumPy array evaluated for the input theta and y
+        Returns a 1-by-`n` NumPy array evaluated for the input ``theta`` and ``y``.
 
     Examples
     --------
@@ -309,7 +293,7 @@ def ee_percentile(theta, y, q):
     >>> estr.theta
 
     Then displays the estimated percentile / median. In this example, there is a difference between the closed form
-    solution (-0.07978) and M-Estimation (-0.06022).
+    solution (``-0.07978``) and M-Estimation (``-0.06022``).
 
     References
     ----------
@@ -352,11 +336,6 @@ def ee_positive_mean_deviation(theta, y):
     sandwich) cannot be used to estimate the variance. This estimating equation is offered for completeness, but is not
     generally recommended for applications.
 
-    Note
-    ----
-    All provided estimating equations are meant to be wrapped inside a user-specified function. Throughtout, these
-    user-defined functions are defined as ``psi``.
-
     Parameters
     ----------
     theta : ndarray, list, vector
@@ -369,7 +348,7 @@ def ee_positive_mean_deviation(theta, y):
     Returns
     -------
     array :
-        Returns a 2-by-n NumPy array evaluated for the input theta and y
+        Returns a 2-by-`n` NumPy array evaluated for the input ``theta`` and ``y``.
 
     Examples
     --------