diff --git a/examples/1_feature_overview.ipynb b/examples/1_feature_overview.ipynb index dff8cd95f..62ff54f27 100644 --- a/examples/1_feature_overview.ipynb +++ b/examples/1_feature_overview.ipynb @@ -639,12 +639,48 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### SR3" + "### STLSQ - verbose (print out optimization terms at each iteration)\n", + "The same verbose option is available with all the other optimizers. For optimizers that use the CVXPY\n", + "package, there is additional boolean flag, verbose_cvxpy, that decides whether or not CVXPY solves will also be verbose." ] }, { "cell_type": "code", "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Iteration ... |y - Xw|^2 ... a * |w|_2 ... |w|_0 ... Total error: |y - Xw|^2 + a * |w|_2\n", + " 0 ... 3.1040e+01 ... 4.9652e+02 ... 8 ... 5.2756e+02\n", + " 1 ... 1.1797e+00 ... 4.9674e+02 ... 7 ... 4.9792e+02\n", + " 2 ... 1.1712e+00 ... 4.9675e+02 ... 7 ... 4.9792e+02\n", + "(x0)' = -9.999 x0 + 9.999 x1\n", + "(x1)' = 27.992 x0 + -0.999 x1 + -1.000 x0 x2\n", + "(x2)' = -2.666 x2 + 1.000 x0 x1\n" + ] + } + ], + "source": [ + "stlsq_optimizer = ps.STLSQ(threshold=.01, alpha=.5, verbose=True)\n", + "\n", + "model = ps.SINDy(optimizer=stlsq_optimizer)\n", + "model.fit(x_train, t=dt)\n", + "model.print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SR3" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:45.862190Z", @@ -680,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.058222Z", @@ -732,7 +768,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -781,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -827,7 +863,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.071020Z", @@ -853,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.108252Z", @@ -893,7 +929,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -919,7 +955,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -977,7 +1013,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1007,7 +1043,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1036,7 +1072,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1066,7 +1102,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -1095,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1125,7 +1161,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.436242Z", @@ -1161,7 +1197,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1245,7 +1281,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.478998Z", @@ -1281,7 +1317,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.537504Z", @@ -1317,7 +1353,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1381,7 +1417,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.598683Z", @@ -1419,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.930345Z", @@ -1461,7 +1497,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.952076Z", @@ -1496,7 +1532,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:46.987955Z", @@ -1530,7 +1566,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.030687Z", @@ -1566,7 +1602,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.116505Z", @@ -1603,7 +1639,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.168456Z", @@ -1653,7 +1689,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.230682Z", @@ -1695,7 +1731,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.261567Z", @@ -1731,7 +1767,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.334576Z", @@ -1762,7 +1798,7 @@ " 'cos(1 z)']" ] }, - "execution_count": 44, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1789,7 +1825,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1815,7 +1851,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -1869,7 +1905,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "metadata": { "scrolled": false }, @@ -1935,7 +1971,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.495234Z", @@ -1961,7 +1997,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.527323Z", @@ -1995,7 +2031,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:47.700551Z", @@ -2034,7 +2070,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:48.459184Z", @@ -2083,7 +2119,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 53, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:51.769799Z", @@ -2151,7 +2187,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 54, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:51.775131Z", @@ -2165,7 +2201,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 55, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T22:20:55.799799Z", @@ -2234,14 +2270,24 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "Model 0\n", + "Model 1\n", + "Model 2\n", + "Model 3\n", "Solver failed on model 3 , setting coefs to zeros\n", + "Model 4\n", + "Model 5\n", + "Model 6\n", + "Model 7\n", + "Model 8\n", + "Model 9\n", "1 = 5.000 x0 + 1.667 x0_dot + 5.556 x0x0_dot\n", "x0 = 0.200 1 + -0.333 x0_dot + -1.111 x0x0_dot\n", "x0x0 = 0.198 x0 + 0.007 x0x0x0 + -0.338 x0x0_dot + -1.099 x0x0x0_dot\n", @@ -2338,7 +2384,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -2425,7 +2471,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -2445,7 +2491,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -2487,7 +2533,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 60, "metadata": { "scrolled": true }, @@ -2624,7 +2670,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 61, "metadata": {}, "outputs": [ { diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py index 8723ebcbe..70015a41b 100644 --- a/pysindy/optimizers/constrained_sr3.py +++ b/pysindy/optimizers/constrained_sr3.py @@ -18,22 +18,22 @@ class ConstrainedSR3(SR3): .. math:: - 0.5\\|y-Xw\\|^2_2 + \\lambda \\times R(v) - + (0.5 / nu)\\|w-v\\|^2_2 + 0.5\\|y-Xw\\|^2_2 + \\lambda R(u) + + (0.5 / \\nu)\\|w-u\\|^2_2 \\text{subject to} .. math:: - Cw = d + CW = d - over v and w where :math:`R(v)` is a regularization function, C is a + over U and W, where :math:`R(U)` is a regularization function, C is a constraint matrix, and d is a vector of values. See the following reference for more details: Champion, Kathleen, et al. "A unified sparse optimization framework to learn parsimonious physics-informed models from data." - arXiv preprint arXiv:1906.10612 (2019). + IEEE Access 8 (2020): 169259-169271. Parameters ---------- @@ -115,6 +115,15 @@ class ConstrainedSR3(SR3): inequality_constraints : bool, optional (default False) If True, CVXPY methods are used to solve the problem. + verbose : bool, optional (default False) + If True, prints out the different error terms every + max_iter / 10 iterations. + + verbose_cvxpy : bool, optional (default False) + Boolean flag which is passed to CVXPY solve function to indicate if + output should be verbose or not. Only relevant for optimizers that + use the CVXPY package in some capabity. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -150,6 +159,8 @@ def __init__( initial_guess=None, thresholds=None, inequality_constraints=False, + verbose=False, + verbose_cvxpy=False, ): super(ConstrainedSR3, self).__init__( threshold=threshold, @@ -164,8 +175,10 @@ def __init__( fit_intercept=fit_intercept, copy_X=copy_X, normalize_columns=normalize_columns, + verbose=verbose, ) + self.verbose_cvxpy = verbose_cvxpy self.reg = get_regularization(thresholder) self.use_constraints = (constraint_lhs is not None) and ( constraint_rhs is not None @@ -243,13 +256,18 @@ def _update_coef_cvxpy(self, x, y, coef_sparse): # default solver is OSQP here but switches to ECOS for L2 try: - prob.solve(max_iter=self.max_iter, eps_abs=self.tol, eps_rel=self.tol) + prob.solve( + max_iter=self.max_iter, + eps_abs=self.tol, + eps_rel=self.tol, + verbose=self.verbose_cvxpy, + ) # Annoying error coming from L2 norm switching to use the ECOS # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. except TypeError: try: - prob.solve(abstol=self.tol, reltol=self.tol) + prob.solve(abstol=self.tol, reltol=self.tol, verbose=self.verbose_cvxpy) except cp.error.SolverError: print("Solver failed, setting coefs to zeros") xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1]) @@ -276,25 +294,48 @@ def _update_sparse_coef(self, coef_full): self.history_.append(coef_sparse.T) return coef_sparse - def _objective(self, x, y, coef_full, coef_sparse, trimming_array=None): + def _objective(self, x, y, q, coef_full, coef_sparse, trimming_array=None): """Objective function""" + if q != 0: + print_ind = q % (self.max_iter // 10.0) + else: + print_ind = q R2 = (y - np.dot(x, coef_full)) ** 2 D2 = (coef_full - coef_sparse) ** 2 if self.use_trimming: assert trimming_array is not None R2 *= trimming_array.reshape(x.shape[0], 1) + if self.thresholds is None: - return ( - 0.5 * np.sum(R2) - + self.reg(coef_full, 0.5 * self.threshold ** 2 / self.nu) - + 0.5 * np.sum(D2) / self.nu - ) + regularization = self.reg(coef_full, self.threshold ** 2 / self.nu) + if print_ind == 0 and self.verbose: + row = [ + q, + np.sum(R2), + np.sum(D2) / self.nu, + regularization, + np.sum(R2) + np.sum(D2) + regularization, + ] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}" + " ... {4:10.4e}".format(*row) + ) + return 0.5 * np.sum(R2) + 0.5 * regularization + 0.5 * np.sum(D2) / self.nu else: - return ( - 0.5 * np.sum(R2) - + self.reg(coef_full, 0.5 * self.thresholds ** 2 / self.nu) - + 0.5 * np.sum(D2) / self.nu - ) + regularization = self.reg(coef_full, self.thresholds ** 2 / self.nu) + if print_ind == 0 and self.verbose: + row = [ + q, + np.sum(R2), + np.sum(D2) / self.nu, + regularization, + np.sum(R2) + np.sum(D2) + regularization, + ] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}" + " ... {4:10.4e}".format(*row) + ) + return 0.5 * np.sum(R2) + 0.5 * regularization + 0.5 * np.sum(D2) / self.nu def _reduce(self, x, y): """ @@ -333,12 +374,25 @@ def _reduce(self, x, y): x_expanded, (n_samples * n_targets, n_targets * n_features) ) + # Print initial values for each term in the optimization + if self.verbose: + row = [ + "Iteration", + "|y - Xw|^2", + "|w-u|^2/v", + "R(u)", + "Total Error: |y - Xw|^2 + |w - u|^2 / v + R(u)", + ] + print( + "{: >10} ... {: >10} ... {: >10} ... {: >10} ... {: >10}".format(*row) + ) + objective_history = [] if self.inequality_constraints: coef_sparse = self._update_coef_cvxpy(x_expanded, y, coef_sparse) - objective_history.append(self._objective(x, y, coef_full, coef_sparse)) + objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse)) else: - for _ in range(self.max_iter): + for k in range(self.max_iter): if self.use_trimming: x_weighted = x * trimming_array.reshape(n_samples, 1) H = np.dot(x_weighted.T, x) + np.diag( @@ -363,11 +417,11 @@ def _reduce(self, x, y): ) objective_history.append( - self._objective(x, y, coef_full, coef_sparse, trimming_array) + self._objective(x, y, k, coef_full, coef_sparse, trimming_array) ) else: objective_history.append( - self._objective(x, y, coef_full, coef_sparse) + self._objective(x, y, k, coef_full, coef_sparse) ) if self._convergence_criterion() < self.tol: # TODO: Update this for trimming/constraints diff --git a/pysindy/optimizers/frols.py b/pysindy/optimizers/frols.py index 7a7aa8427..93a6e1191 100644 --- a/pysindy/optimizers/frols.py +++ b/pysindy/optimizers/frols.py @@ -46,6 +46,10 @@ class FROLS(BaseOptimizer): ridge_kw : dict, optional (default None) Optional keyword arguments to pass to the ridge regression. + verbose : bool, optional (default False) + If True, prints out the different error terms every + iteration. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -84,6 +88,7 @@ def __init__( max_iter=10, alpha=0.05, ridge_kw=None, + verbose=False, ): super(FROLS, self).__init__( fit_intercept=fit_intercept, @@ -96,6 +101,7 @@ def __init__( self.kappa = kappa if self.max_iter <= 0: raise ValueError("Max iteration must be > 0") + self.verbose = verbose def _normed_cov(self, a, b): return np.vdot(a, b) / np.vdot(a, a) @@ -135,6 +141,31 @@ def _reduce(self, x, y): """ n_samples, n_features = x.shape n_targets = y.shape[1] + cond_num = np.linalg.cond(x) + if self.kappa is not None: + l0_penalty = self.kappa * cond_num + else: + l0_penalty = 0.0 + + # Print initial values for each term in the optimization + if self.verbose: + row = [ + "Iteration", + "Index", + "|y - Xw|^2", + "a * |w|_2", + "|w|_0", + "b * |w|_0", + "Total: |y-Xw|^2+a*|w|_2+b*|w|_0", + ] + print( + "{: >10} ... {: >5} ... {: >10} ... {: >10} ... {: >5}" + " ... {: >10} ... {: >10}".format(*row) + ) + print( + " Note that these error metrics are related but not the same" + " as the loss function being minimized by FROLS!" + ) # History of selected functions: [iteration x output x coefficients] self.history_ = np.zeros((n_features, n_targets, n_features), dtype=x.dtype) @@ -191,14 +222,19 @@ def _reduce(self, x, y): if i >= self.max_iter: break - - if self.kappa is not None: - l0_penalty = self.kappa * np.linalg.cond(x) - else: - l0_penalty = 0.0 + if self.verbose: + coef = self.history_[i, k, :] + R2 = np.sum((y[:, k] - np.dot(x, coef).T) ** 2) + L2 = self.alpha * np.sum(coef ** 2) + L0 = np.count_nonzero(coef) + row = [i, k, R2, L2, L0, l0_penalty * L0, R2 + L2 + l0_penalty * L0] + print( + "{0:10d} ... {1:5d} ... {2:10.4e} ... {3:10.4e}" + " ... {4:5d} ... {5:10.4e} ... {6:10.4e}".format(*row) + ) # Function selection: L2 error for output k at iteration i is given by - # sum(ERR_global[k, :i]), and the number of nonzero coefficients is (i+1) + # sum(ERR_global[k, :i]), and the number of nonzero coefficients is (i+1) l2_err = np.cumsum(self.ERR_global, axis=1) l0_norm = np.arange(1, n_features + 1)[:, None] self.loss_ = l2_err + l0_penalty * l0_norm # Save for debugging diff --git a/pysindy/optimizers/sindy_pi.py b/pysindy/optimizers/sindy_pi.py index f8c7ab74c..c8c49bfce 100644 --- a/pysindy/optimizers/sindy_pi.py +++ b/pysindy/optimizers/sindy_pi.py @@ -15,7 +15,7 @@ class SINDyPI(SR3): .. math:: - 0.5\\|X-Xw\\|^2_2 + \\lambda \\times R(v) + 0.5\\|X-Xw\\|^2_2 + \\lambda R(w) over w where :math:`R(v)` is a regularization function. See the following reference for more details: @@ -76,6 +76,11 @@ class SINDyPI(SR3): candidate functions. This can take a long time for 4D systems or larger. + verbose_cvxpy : bool, optional (default False) + Boolean flag which is passed to CVXPY solve function to indicate if + output should be verbose or not. Only relevant for optimizers that + use the CVXPY package in some capabity. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -101,6 +106,7 @@ def __init__( thresholds=None, model_subset=None, normalize_columns=False, + verbose_cvxpy=False, ): super(SINDyPI, self).__init__( threshold=threshold, @@ -125,6 +131,7 @@ def __init__( ) self.unbias = False + self.verbose_cvxpy = verbose_cvxpy if model_subset is not None: if not isinstance(model_subset, list): raise ValueError("Model subset must be in list format.") @@ -155,6 +162,7 @@ def _update_parallel_coef_constraints(self, x): "of features in the candidate library" ) for i in self.model_subset: + print("Model ", i) xi = cp.Variable(n_features) # Note that norm choice below must be convex, # so thresholder must be L1 or L2 @@ -183,7 +191,12 @@ def _update_parallel_coef_constraints(self, x): [xi[i] == 0.0], ) try: - prob.solve(max_iter=self.max_iter, eps_abs=self.tol, eps_rel=self.tol) + prob.solve( + max_iter=self.max_iter, + eps_abs=self.tol, + eps_rel=self.tol, + verbose=self.verbose_cvxpy, + ) if xi.value is None: warnings.warn( "Infeasible solve on iteration " + str(i) + ", try " @@ -195,7 +208,12 @@ def _update_parallel_coef_constraints(self, x): # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. except TypeError: - prob.solve(max_iters=self.max_iter, abstol=self.tol, reltol=self.tol) + prob.solve( + max_iters=self.max_iter, + abstol=self.tol, + reltol=self.tol, + verbose=self.verbose_cvxpy, + ) if xi.value is None: warnings.warn( "Infeasible solve on iteration " + str(i) + ", try " diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py index 45763a179..499af3ede 100644 --- a/pysindy/optimizers/sr3.py +++ b/pysindy/optimizers/sr3.py @@ -7,6 +7,7 @@ from ..utils import capped_simplex_projection from ..utils import get_prox +from ..utils import get_regularization from .base import BaseOptimizer @@ -18,14 +19,14 @@ class SR3(BaseOptimizer): .. math:: - 0.5\\|y-Xw\\|^2_2 + \\lambda \\times R(v) - + (0.5 / \\nu)\\|w-v\\|^2_2 + 0.5\\|y-Xw\\|^2_2 + \\lambda R(u) + + (0.5 / \\nu)\\|w-u\\|^2_2 - where :math:`R(v)` is a regularization function. + where :math:`R(u)` is a regularization function. See the following references for more details: Zheng, Peng, et al. "A unified framework for sparse relaxed - regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423. + regularized regression: SR3." IEEE Access 7 (2018): 1404-1423. Champion, K., Zheng, P., Aravkin, A. Y., Brunton, S. L., & Kutz, J. N. (2020). A unified sparse optimization framework to learn parsimonious @@ -95,6 +96,10 @@ class SR3(BaseOptimizer): threshold to be used for the (j + 1)st library function in the equation for the (i + 1)st measurement variable. + verbose : bool, optional (default False) + If True, prints out the different error terms every + max_iter / 10 iterations. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -144,6 +149,7 @@ def __init__( copy_X=True, initial_guess=None, normalize_columns=False, + verbose=False, ): super(SR3, self).__init__( max_iter=max_iter, @@ -203,6 +209,7 @@ def __init__( self.nu = nu self.tol = tol self.thresholder = thresholder + self.reg = get_regularization(thresholder) self.prox = get_prox(thresholder) if trimming_fraction == 0.0: self.use_trimming = False @@ -210,6 +217,7 @@ def __init__( self.use_trimming = True self.trimming_fraction = trimming_fraction self.trimming_step_size = trimming_step_size + self.verbose = verbose def enable_trimming(self, trimming_fraction): """ @@ -229,6 +237,48 @@ def disable_trimming(self): self.use_trimming = False self.trimming_fraction = None + def _objective(self, x, y, q, coef_full, coef_sparse, trimming_array=None): + """Objective function""" + if q != 0: + print_ind = q % (self.max_iter // 10.0) + else: + print_ind = q + R2 = (y - np.dot(x, coef_full)) ** 2 + D2 = (coef_full - coef_sparse) ** 2 + if self.use_trimming: + assert trimming_array is not None + R2 *= trimming_array.reshape(x.shape[0], 1) + if self.thresholds is None: + regularization = self.reg(coef_full, self.threshold ** 2 / self.nu) + if print_ind == 0 and self.verbose: + row = [ + q, + np.sum(R2), + np.sum(D2) / self.nu, + regularization, + np.sum(R2) + np.sum(D2) + regularization, + ] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}" + " ... {4:10.4e}".format(*row) + ) + return 0.5 * np.sum(R2) + 0.5 * regularization + 0.5 * np.sum(D2) / self.nu + else: + regularization = self.reg(coef_full, self.thresholds ** 2 / self.nu) + if print_ind == 0 and self.verbose: + row = [ + q, + np.sum(R2), + np.sum(D2) / self.nu, + regularization, + np.sum(R2) + np.sum(D2) + regularization, + ] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}" + " ... {4:10.4e}".format(*row) + ) + return 0.5 * np.sum(R2) + 0.5 * regularization + 0.5 * np.sum(D2) / self.nu + def _update_full_coef(self, cho, x_transpose_y, coef_sparse): """Update the unregularized weight vector""" b = x_transpose_y + coef_sparse / self.nu @@ -289,13 +339,30 @@ def _reduce(self, x, y): coef_full = coef_sparse.copy() trimming_array = np.repeat(1.0 - self.trimming_fraction, n_samples) self.history_trimming_ = [trimming_array] + else: + trimming_array = None # Precompute some objects for upcoming least-squares solves. # Assumes that self.nu is fixed throughout optimization procedure. cho = cho_factor(np.dot(x.T, x) + np.diag(np.full(x.shape[1], 1.0 / self.nu))) x_transpose_y = np.dot(x.T, y) - for _ in range(self.max_iter): + # Print initial values for each term in the optimization + if self.verbose: + row = [ + "Iteration", + "|y - Xw|^2", + "|w-u|^2/v", + "R(u)", + "Total Error: |y-Xw|^2 + |w-u|^2/v + R(u)", + ] + print( + "{: >10} ... {: >10} ... {: >10} ... {: >10}" + " ... {: >10}".format(*row) + ) + objective_history = [] + + for k in range(self.max_iter): if self.use_trimming: x_weighted = x * trimming_array.reshape(n_samples, 1) cho = cho_factor( @@ -307,12 +374,13 @@ def _reduce(self, x, y): coef_full = self._update_full_coef(cho, x_transpose_y, coef_sparse) coef_sparse = self._update_sparse_coef(coef_full) self.history_.append(coef_sparse.T) - if self.use_trimming: trimming_array = self._update_trimming_array( coef_full, trimming_array, trimming_grad ) - + objective_history.append( + self._objective(x, y, k, coef_full, coef_sparse, trimming_array) + ) if self._convergence_criterion() < self.tol: # Could not (further) select important features break @@ -327,3 +395,4 @@ def _reduce(self, x, y): self.coef_full_ = coef_full.T if self.use_trimming: self.trimming_array = trimming_array + self.objective_history = objective_history diff --git a/pysindy/optimizers/ssr.py b/pysindy/optimizers/ssr.py index a2ca527d7..3117dce24 100644 --- a/pysindy/optimizers/ssr.py +++ b/pysindy/optimizers/ssr.py @@ -52,6 +52,9 @@ class SSR(BaseOptimizer): ridge_kw : dict, optional (default None) Optional keyword arguments to pass to the ridge regression. + verbose : bool, optional (default False) + If True, prints out the different error terms every iteration. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -95,6 +98,7 @@ def __init__( copy_X=True, criteria="coefficient_value", kappa=None, + verbose=False, ): super(SSR, self).__init__( max_iter=max_iter, @@ -121,6 +125,7 @@ def __init__( self.alpha = alpha self.ridge_kw = ridge_kw self.kappa = kappa + self.verbose = verbose def _coefficient_value(self, coef): """Eliminate the smallest element of the weight vector(s)""" @@ -169,9 +174,30 @@ def _reduce(self, x, y): """ n_samples, n_features = x.shape n_targets = y.shape[1] + cond_num = np.linalg.cond(x) + if self.kappa is not None: + l0_penalty = self.kappa * cond_num + else: + l0_penalty = 0 coef = self._regress(x, y) inds = np.ones((n_targets, n_features), dtype=bool) + + # Print initial values for each term in the optimization + if self.verbose: + row = [ + "Iteration", + "|y - Xw|^2", + "a * |w|_2", + "|w|_0", + "b * |w|_0", + "Total: |y-Xw|^2+a*|w|_2+b*|w|_0", + ] + print( + "{: >10} ... {: >10} ... {: >10} ... {: >10}" + " ... {: >10} ... {: >10}".format(*row) + ) + self.err_history_ = [] for k in range(self.max_iter): for i in range(n_targets): @@ -189,13 +215,18 @@ def _reduce(self, x, y): coef[i, inds[i, :]] = self._regress(x[:, inds[i, :]], y[:, i]) self.history_.append(np.copy(coef)) - if self.kappa is not None: - l0_penalty = self.kappa * np.linalg.cond(x) - self.err_history_.append( - np.sum((y - x @ coef.T) ** 2) + l0_penalty * np.count_nonzero(coef) + if self.verbose: + R2 = np.sum((y - np.dot(x, coef.T)) ** 2) + L2 = self.alpha * np.sum(coef ** 2) + L0 = np.count_nonzero(coef) + row = [k, R2, L2, L0, l0_penalty * L0, R2 + L2 + l0_penalty * L0] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10d}" + " ... {4:10.4e} ... {5:10.4e}".format(*row) ) - else: - self.err_history_.append(np.sum((y - x @ coef.T) ** 2)) + self.err_history_.append( + np.sum((y - x @ coef.T) ** 2) + l0_penalty * np.count_nonzero(coef) + ) if np.all(np.sum(np.asarray(inds, dtype=int), axis=1) <= 1): # each equation has one last term break diff --git a/pysindy/optimizers/stlsq.py b/pysindy/optimizers/stlsq.py index a3a42a1f5..a335aa9cd 100644 --- a/pysindy/optimizers/stlsq.py +++ b/pysindy/optimizers/stlsq.py @@ -9,7 +9,8 @@ class STLSQ(BaseOptimizer): - """Sequentially thresholded least squares algorithm. + """Sequentially thresholded least squares algorithm. Defaults to doing + Sequentially thresholded Ridge regression. Attempts to minimize the objective function :math:`\\|y - Xw\\|^2_2 + \\alpha \\|w\\|^2_2` @@ -57,6 +58,9 @@ class STLSQ(BaseOptimizer): Initial guess for coefficients ``coef_``. If None, least-squares is used to obtain an initial guess. + verbose : bool, optional (default False) + If True, prints out the different error terms every iteration. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -101,6 +105,7 @@ def __init__( fit_intercept=False, copy_X=True, initial_guess=None, + verbose=False, ): super(STLSQ, self).__init__( max_iter=max_iter, @@ -118,6 +123,7 @@ def __init__( self.alpha = alpha self.ridge_kw = ridge_kw self.initial_guess = initial_guess + self.verbose = verbose def _sparse_coefficients(self, dim, ind, coef, threshold): """Perform thresholding of the weight vector(s)""" @@ -152,14 +158,27 @@ def _reduce(self, x, y): """ if self.initial_guess is not None: self.coef_ = self.initial_guess - self.Theta = x ind = self.ind_ n_samples, n_features = x.shape n_targets = y.shape[1] n_features_selected = np.sum(ind) - for _ in range(self.max_iter): + # Print initial values for each term in the optimization + if self.verbose: + row = [ + "Iteration", + "|y - Xw|^2", + "a * |w|_2", + "|w|_0", + "Total error: |y - Xw|^2 + a * |w|_2", + ] + print( + "{: >10} ... {: >10} ... {: >10} ... {: >10}" + " ... {: >10}".format(*row) + ) + + for k in range(self.max_iter): if np.count_nonzero(ind) == 0: warnings.warn( "Sparsity parameter is too big ({}) and eliminated all " @@ -184,6 +203,15 @@ def _reduce(self, x, y): ind[i] = ind_i self.history_.append(coef) + if self.verbose: + R2 = np.sum((y - np.dot(x, coef.T)) ** 2) + L2 = self.alpha * np.sum(coef ** 2) + L0 = np.count_nonzero(coef) + row = [k, R2, L2, L0, R2 + L2] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10d}" + " ... {4:10.4e}".format(*row) + ) if np.sum(ind) == n_features_selected or self._no_change(): # could not (further) select important features break diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 3f70911c4..63e0297b6 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -16,15 +16,23 @@ class TrappingSR3(SR3): This optimizer can be used to identify systems with stable (bounded) solutions. - Attempts to minimize the objective function + Attempts to minimize one of two related objective functions .. math:: - 0.5\\|y-Xw\\|^2_2 + \\lambda \\times R(w) + 0.5\\|y-Xw\\|^2_2 + \\lambda R(w) + 0.5\\|Pw-A\\|^2_2/\\eta + \\delta_0(Cw-d) - + \\delta_{\\Lambda}(A)} + + \\delta_{\\Lambda}(A) - where :math:`R(w)` is a regularization function. + or + + .. math:: + + 0.5\\|y-Xw\\|^2_2 + \\lambda R(w) + + \\delta_0(Cw-d) + + 0.5 max_eigenvalue(A)/\\eta + + where :math:`R(w)` is a regularization function, which must be convex. See the following references for more details: Kaptanoglu, Alan A., et al. "Promoting global stability in @@ -134,6 +142,15 @@ class TrappingSR3(SR3): by dividing by the L2-norm. Note that the 'normalize' option in sklearn is deprecated in sklearn versions >= 1.0 and will be removed. + verbose : bool, optional (default False) + If True, prints out the different error terms every + max_iter / 10 iterations. + + verbose_cvxpy : bool, optional (default False) + Boolean flag which is passed to CVXPY solve function to indicate if + output should be verbose or not. Only relevant for optimizers that + use the CVXPY package in some capabity. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -223,6 +240,8 @@ def __init__( constraint_lhs=None, constraint_rhs=None, constraint_order="target", + verbose=False, + verbose_cvxpy=False, ): super(TrappingSR3, self).__init__( threshold=threshold, @@ -232,6 +251,7 @@ def __init__( copy_X=copy_X, thresholder=thresholder, thresholds=thresholds, + verbose=verbose, ) if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"): raise ValueError("Regularizer must be (weighted) L1 or L2") @@ -284,6 +304,7 @@ def __init__( self.tol = tol self.tol_m = tol_m self.accel = accel + self.verbose_cvxpy = verbose_cvxpy self.A_history_ = [] self.m_history_ = [] self.PW_history_ = [] @@ -454,6 +475,10 @@ def _m_convergence_criterion(self): def _objective(self, x, y, coef_sparse, A, PW, q): """Objective function""" + if q != 0: + print_ind = q % (self.max_iter // 10.0) + else: + print_ind = q # Compute the errors R2 = (y - np.dot(x, coef_sparse)) ** 2 @@ -461,9 +486,18 @@ def _objective(self, x, y, coef_sparse, A, PW, q): L1 = self.threshold * np.sum(np.abs(coef_sparse.flatten())) # convoluted way to print every max_iter / 10 iterations - if q % max(int(self.max_iter / 10.0), 1) == 0 or self.threshold != 0.0: - row = [q, 0.5 * np.sum(R2), 0.5 * np.sum(A2) / self.eta, L1] - print("{0:12d} {1:12.5e} {2:12.5e} {3:12.5e}".format(*row)) + if print_ind == 0 and self.verbose: + row = [ + q, + 0.5 * np.sum(R2), + 0.5 * np.sum(A2) / self.eta, + L1, + 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1, + ] + print( + "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}" + " ... {4:10.4e}".format(*row) + ) return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1 def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev): @@ -495,13 +529,21 @@ def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_pr # default solver is OSQP here but switches to ECOS for L2 try: - prob.solve(eps_abs=self.eps_solver, eps_rel=self.eps_solver) + prob.solve( + eps_abs=self.eps_solver, + eps_rel=self.eps_solver, + verbose=self.verbose_cvxpy, + ) # Annoying error coming from L2 norm switching to use the ECOS # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. except TypeError: try: - prob.solve(abstol=self.eps_solver, reltol=self.eps_solver) + prob.solve( + abstol=self.eps_solver, + reltol=self.eps_solver, + verbose=self.verbose_cvxpy, + ) except cp.error.SolverError: print("Solver failed, setting coefs to zeros") xi.value = np.zeros(N * r) @@ -595,12 +637,16 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev): # default solver is SCS here try: - prob.solve(eps=self.eps_solver) + prob.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy) # Annoying error coming from L2 norm switching to use the ECOS # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. except TypeError: - prob.solve(abstol=self.eps_solver, reltol=self.eps_solver) + prob.solve( + abstol=self.eps_solver, + reltol=self.eps_solver, + verbose=self.verbose_cvxpy, + ) except cp.error.SolverError: print("Solver failed, setting coefs to zeros") xi.value = np.zeros(N * r) @@ -623,7 +669,7 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev): prob_m = cp.Problem(cp.Minimize(cost_m)) # default solver is SCS here - prob_m.solve(eps=self.eps_solver) + prob_m.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy) m = m_cp.value if m is None: @@ -656,8 +702,18 @@ def _reduce(self, x, y): coef_sparse = self.coef_.T # Print initial values for each term in the optimization - row = ["Iteration", "Data Error", "Stability Error", "L1 Error"] - print("{: >10} | {: >10} | {: >10} | {: >10}".format(*row)) + if self.verbose: + row = [ + "Iteration", + "Data Error", + "Stability Error", + "L1 Error", + "Total Error", + ] + print( + "{: >10} ... {: >10} ... {: >10} ... {: >10}" + " ... {: >10}".format(*row) + ) # initial A if self.A0 is not None: diff --git a/pysindy/utils/base.py b/pysindy/utils/base.py index b80f3b700..84be726ed 100644 --- a/pysindy/utils/base.py +++ b/pysindy/utils/base.py @@ -316,6 +316,8 @@ def get_regularization(regularization): return lambda x, lam: lam * np.sum(x ** 2) elif regularization.lower() == "weighted_l2": return lambda x, lam: np.sum(lam @ x ** 2) + elif regularization.lower() == "cad": # dummy function + return lambda x, lam: 0 else: raise NotImplementedError("{} has not been implemented".format(regularization)) diff --git a/test/optimizers/test_optimizers.py b/test/optimizers/test_optimizers.py index effef5e4c..f16622d25 100644 --- a/test/optimizers/test_optimizers.py +++ b/test/optimizers/test_optimizers.py @@ -587,7 +587,7 @@ def test_prox_functions(data_derivative_1d, optimizer, thresholder): def test_cad_prox_function(data_derivative_1d): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) - model = SR3(thresholder="cAd") + model = SR3(thresholder="cad") model.fit(x, x_dot) check_is_fitted(model) @@ -982,3 +982,41 @@ def test_ssr_criteria(data_lorenz): model = SINDy(optimizer=opt) model.fit(x) assert np.shape(opt.coef_) == (3, 10) + + +@pytest.mark.parametrize( + "optimizer", + [ + STLSQ, + SSR, + FROLS, + SR3, + ConstrainedSR3, + TrappingSR3, + ], +) +def test_optimizers_verbose(data_lorenz, optimizer): + x, t = data_lorenz + dt = t[1] - t[0] + x_dot = FiniteDifference()._differentiate(x, t=dt) + model = optimizer(verbose=True) + model.fit(x, x_dot) + check_is_fitted(model) + + +@pytest.mark.parametrize( + "optimizer", + [ + SINDyPI, + ConstrainedSR3, + TrappingSR3, + ], +) +def test_optimizers_verbose_cvxpy(data_lorenz, optimizer): + x, t = data_lorenz + dt = t[1] - t[0] + x_dot = FiniteDifference()._differentiate(x, t=dt) + + model = optimizer(verbose_cvxpy=True) + model.fit(x, x_dot) + check_is_fitted(model) diff --git a/test/property_tests/test_optimizers_complexity.py b/test/property_tests/test_optimizers_complexity.py index 88c28897b..90cc69e87 100644 --- a/test/property_tests/test_optimizers_complexity.py +++ b/test/property_tests/test_optimizers_complexity.py @@ -81,7 +81,7 @@ def test_complexity(n_samples, n_features, n_informative, random_state): n_informative=integers(min_value=3, max_value=9), random_state=integers(min_value=0, max_value=2 ** 32 - 1), ) -@settings(max_examples=20) +@settings(max_examples=20, deadline=None) def test_complexity_parameter( opt_cls, reg_name, n_samples, n_features, n_informative, random_state ): @@ -111,5 +111,4 @@ def test_complexity_parameter( opt.fit(x, y) for less_complex, more_complex in zip(optimizers, optimizers[1:]): - print(less_complex.complexity, more_complex.complexity) assert less_complex.complexity <= more_complex.complexity