From d4a2a4ac31d91dcc26c215bcfa8d5e255d3af3c1 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 16:36:32 -0400 Subject: [PATCH 01/25] added regularization --- src/elexsolver/QuantileRegressionSolver.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index 744710ac..3019ea9b 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -50,21 +50,28 @@ def _check_matrix_condition(self, x): return False return True - def __solve(self, x, y, weights, verbose): + def get_loss_function(self, x, y, coefficients, weights): + y_hat = x @ coefficients + residual = y - y_hat + return cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) + + def get_regularizer(self, coefficients): + return cp.pnorm(coefficients, p=2) ** 2 + + def __solve(self, x, y, weights, lambda_, verbose): """ Sets up the optimization problem and solves it """ self._check_matrix_condition(x) coefficients = cp.Variable((x.shape[1], )) - y_hat = x @ coefficients - residual = y - y_hat - loss_function = cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) + loss_function = self.get_loss_function(x, y, coefficients, weights) + loss_function += lambda_ * self.get_regularizer(coefficients) objective = cp.Minimize(loss_function) problem = cp.Problem(objective) problem.solve(solver=self.solver, verbose=verbose, **self.KWARGS.get(self.solver, {})) return coefficients, problem - def fit(self, x, y, tau_value=0.5, weights=None, verbose=False, save_problem=False, normalize_weights=True): + def fit(self, x, y, tau_value=0.5, weights=None, lambda_=0, verbose=False, save_problem=False, normalize_weights=True): """ Fit the (weighted) quantile regression problem. Weights should not sum to one. @@ -79,7 +86,7 @@ def fit(self, x, y, tau_value=0.5, weights=None, verbose=False, save_problem=Fal weights = weights / weights_sum self.tau.value = tau_value - coefficients, problem = self.__solve(x, y, weights, verbose) + coefficients, problem = self.__solve(x, y, weights, lambda_, verbose) self.coefficients = coefficients.value if save_problem: self.problem = problem From f8f9df03214b20a389d282e00e40cdc52dd8c5e6 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 16:36:48 -0400 Subject: [PATCH 02/25] added unit tests' --- tests/test_quantile.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index e10ccfee..48f562c5 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -210,6 +210,25 @@ def test_weight_normalization_same_fit(random_data_weights): preds = quantreg.predict(x) assert all(np.abs(preds - [9, 9, 9, 15]) <= TOL) +######################## +# Test regularization # +######################## + +def test_regularization(random_data_no_weights): + tau = 0.5 + x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values + y = random_data_no_weights['y'].values + + quantreg = QuantileRegressionSolver() + lambda_ = 1e8 + quantreg.fit(x, y, tau, lambda_=lambda_, save_problem=True) + coefficients_w_reg = quantreg.coefficients + assert all(np.abs(coefficients_w_reg - [0, 0, 0, 0, 0]) <= TOL) + + objective_w_reg = quantreg.problem.value + quantreg.fit(x, y, tau, save_problem=True) + assert quantreg.problem.value < objective_w_reg + ######################## # Test checking matrix # ######################## From 0d1cb7bd4ddcb5d882f7851f29f01680ea449413 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 21:53:03 -0400 Subject: [PATCH 03/25] added requirements to run pre-commit --- requirements-dev.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index df3cf973..392cdbe0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,4 +6,7 @@ autopep8 betamax betamax-serializers pylint -tox \ No newline at end of file +tox +black +isort +pre-commit \ No newline at end of file From 455967de06a87b06c30135512d79cb1397d462e0 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 21:53:12 -0400 Subject: [PATCH 04/25] added pre-commit config --- .pre-commit-config.yaml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..102787d2 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +files: \.py$ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/pycqa/isort + rev: 5.11.5 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black", --line-length=120] + + # black + - repo: https://github.com/ambv/black + rev: 22.3.0 + hooks: + - id: black + args: # arguments to configure black + - --line-length=120 + language_version: python3 + + # flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: flake8 + args: # arguments to configure flake8 + # making flake8 line length compatible with black + - "--max-line-length=120" + - "--max-complexity=18" + - "--select=B,C,E,F,W,T4,B9" + # these are errors that will be ignored by flake8 + # definitions here + # https://flake8.pycqa.org/en/latest/user/error-codes.html + - "--ignore=E266,E501,W503" \ No newline at end of file From 230d6e26e822a866907898bddc05eabcb3d6f533 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 21:53:26 -0400 Subject: [PATCH 05/25] ran pre-commit against all files --- setup.py | 49 +++++---- src/elexsolver/QuantileRegressionSolver.py | 27 +++-- src/elexsolver/TransitionMatrixSolver.py | 5 +- src/elexsolver/logging.py | 2 +- tests/conftest.py | 29 ++--- tests/test_quantile.py | 117 ++++++++++++--------- 6 files changed, 128 insertions(+), 101 deletions(-) diff --git a/setup.py b/setup.py index d8818d59..81d74eda 100644 --- a/setup.py +++ b/setup.py @@ -1,52 +1,51 @@ -from codecs import open import os +from codecs import open + from setuptools import find_packages, setup -INSTALL_REQUIRES = ( - 'cvxpy<=1.2.0' -) +INSTALL_REQUIRES = "cvxpy<=1.2.0" THIS_FILE_DIR = os.path.dirname(__file__) -LONG_DESCRIPTION = '' +LONG_DESCRIPTION = "" # Get the long description from the README file -with open(os.path.join(THIS_FILE_DIR, 'README.md'), encoding='utf-8') as f: +with open(os.path.join(THIS_FILE_DIR, "README.md"), encoding="utf-8") as f: LONG_DESCRIPTION = f.read() # The full version, including alpha/beta/rc tags -RELEASE = '1.0.3' +RELEASE = "1.0.3" # The short X.Y version -VERSION = '.'.join(RELEASE.split('.')[:2]) +VERSION = ".".join(RELEASE.split(".")[:2]) -PROJECT = 'elex-solver' -AUTHOR = 'The Wapo Newsroom Engineering Team' -COPYRIGHT = '2021, {}'.format(AUTHOR) +PROJECT = "elex-solver" +AUTHOR = "The Wapo Newsroom Engineering Team" +COPYRIGHT = "2021, {}".format(AUTHOR) setup( name=PROJECT, version=RELEASE, classifiers=[ - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7' + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.7", ], - description='A package for optimization solvers', + description="A package for optimization solvers", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", - license='MIT', - packages=find_packages('src', exclude=['docs', 'tests']), - package_dir={'': 'src'}, + license="MIT", + packages=find_packages("src", exclude=["docs", "tests"]), + package_dir={"": "src"}, include_package_data=True, zip_safe=False, install_requires=INSTALL_REQUIRES, command_options={ - 'build_sphinx': { - 'project': ('setup.py', PROJECT), - 'version': ('setup.py', VERSION), - 'release': ('setup.py', RELEASE) + "build_sphinx": { + "project": ("setup.py", PROJECT), + "version": ("setup.py", VERSION), + "release": ("setup.py", RELEASE), } }, - py_modules=['elexsolver'] -) \ No newline at end of file + py_modules=["elexsolver"], +) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index 744710ac..6837909c 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -9,25 +9,24 @@ LOG = logging.getLogger(__name__) + class QuantileRegressionSolverException(Exception): pass + class IllConditionedMatrixException(QuantileRegressionSolverException): pass -class QuantileRegressionSolver(): - VALID_SOLVERS = {'SCS', 'ECOS', 'MOSEK', 'OSQP', 'CVXOPT', 'GLPK'} - KWARGS = { - "ECOS": { - "max_iters": 10000 - } - } +class QuantileRegressionSolver: - CONDITION_WARNING_MIN = 50 # arbitrary - CONDITION_ERROR_MIN = 1e+8 # based on scipy + VALID_SOLVERS = {"SCS", "ECOS", "MOSEK", "OSQP", "CVXOPT", "GLPK"} + KWARGS = {"ECOS": {"max_iters": 10000}} - def __init__(self, solver='ECOS'): + CONDITION_WARNING_MIN = 50 # arbitrary + CONDITION_ERROR_MIN = 1e8 # based on scipy + + def __init__(self, solver="ECOS"): if solver not in self.VALID_SOLVERS: raise ValueError(f"solver must be in {self.VALID_SOLVERS}") self.tau = cp.Parameter() @@ -46,7 +45,7 @@ def _check_matrix_condition(self, x): f"Ill-conditioned matrix detected. Matrix condition number >= {self.CONDITION_ERROR_MIN}" ) elif condition_number >= self.CONDITION_WARNING_MIN: - LOG.warn(f"Ill-conditioned matrix detected. result is not guaranteed to be accurate") + LOG.warn("Ill-conditioned matrix detected. result is not guaranteed to be accurate") return False return True @@ -55,7 +54,7 @@ def __solve(self, x, y, weights, verbose): Sets up the optimization problem and solves it """ self._check_matrix_condition(x) - coefficients = cp.Variable((x.shape[1], )) + coefficients = cp.Variable((x.shape[1],)) y_hat = x @ coefficients residual = y - y_hat loss_function = cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) @@ -69,7 +68,7 @@ def fit(self, x, y, tau_value=0.5, weights=None, verbose=False, save_problem=Fal Fit the (weighted) quantile regression problem. Weights should not sum to one. """ - if weights is None: # if weights are none, give unit weights + if weights is None: # if weights are none, give unit weights weights = [1] * x.shape[0] if normalize_weights: weights_sum = np.sum(weights) @@ -77,7 +76,7 @@ def fit(self, x, y, tau_value=0.5, weights=None, verbose=False, save_problem=Fal # This should not happen raise ZeroDivisionError weights = weights / weights_sum - + self.tau.value = tau_value coefficients, problem = self.__solve(x, y, weights, verbose) self.coefficients = coefficients.value diff --git a/src/elexsolver/TransitionMatrixSolver.py b/src/elexsolver/TransitionMatrixSolver.py index da38076e..8e29cad3 100644 --- a/src/elexsolver/TransitionMatrixSolver.py +++ b/src/elexsolver/TransitionMatrixSolver.py @@ -1,6 +1,7 @@ import cvxpy as cp -class TransitionMatrixSolver(): + +class TransitionMatrixSolver: def __init__(self): self.transition_matrix = None @@ -12,7 +13,7 @@ def __get_constraint(X, strict): def __solve(self, A, B, strict): transition_matrix = cp.Variable((A.shape[1], B.shape[1])) - loss_function = cp.norm(A @ transition_matrix - B, 'fro') + loss_function = cp.norm(A @ transition_matrix - B, "fro") objective = cp.Minimize(loss_function) constraint = TransitionMatrixSolver.__get_constraint(transition_matrix, strict) problem = cp.Problem(objective, constraint) diff --git a/src/elexsolver/logging.py b/src/elexsolver/logging.py index cf1425c6..fdee5c5f 100644 --- a/src/elexsolver/logging.py +++ b/src/elexsolver/logging.py @@ -31,4 +31,4 @@ def initialize_logging(logging_config=None): app_log_level = os.getenv("APP_LOG_LEVEL", "INFO") LOGGING_CONFIG["loggers"]["elexsolver"]["level"] = app_log_level logging_config = LOGGING_CONFIG - logging.config.dictConfig(logging_config) \ No newline at end of file + logging.config.dictConfig(logging_config) diff --git a/tests/conftest.py b/tests/conftest.py index cb39f879..ca274686 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,23 +1,25 @@ -import os -import sys import json import logging -import pytest +import os +import sys import pandas as pd +import pytest _TEST_FOLDER = os.path.dirname(__file__) -FIXTURE_DIR = os.path.join(_TEST_FOLDER, 'fixtures') +FIXTURE_DIR = os.path.join(_TEST_FOLDER, "fixtures") -@pytest.fixture(autouse=True, scope='session') + +@pytest.fixture(autouse=True, scope="session") def setup_logging(): - LOG = logging.getLogger('elexsolver') + LOG = logging.getLogger("elexsolver") handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) - handler.setFormatter(logging.Formatter(fmt='%(asctime)s %(levelname)s %(name)s %(message)s')) + handler.setFormatter(logging.Formatter(fmt="%(asctime)s %(levelname)s %(name)s %(message)s")) LOG.addHandler(handler) -@pytest.fixture(scope='session') + +@pytest.fixture(scope="session") def get_fixture(): def _get_fixture(filename, load=False, csv=True): fileobj = open(os.path.join(FIXTURE_DIR, filename)) @@ -26,12 +28,15 @@ def _get_fixture(filename, load=False, csv=True): elif csv: return pd.read_csv(os.path.join(FIXTURE_DIR, filename)) return fileobj + return _get_fixture -@pytest.fixture(scope='session') + +@pytest.fixture(scope="session") def random_data_no_weights(get_fixture): - return get_fixture('random_data_n100_p5_17554.csv') + return get_fixture("random_data_n100_p5_17554.csv") + -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def random_data_weights(get_fixture): - return get_fixture('random_data_n100_p5_12549_weights.csv') \ No newline at end of file + return get_fixture("random_data_n100_p5_12549_weights.csv") diff --git a/tests/test_quantile.py b/tests/test_quantile.py index e10ccfee..80f2700f 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -1,7 +1,7 @@ - import numpy as np import pytest -from elexsolver.QuantileRegressionSolver import QuantileRegressionSolver, IllConditionedMatrixException + +from elexsolver.QuantileRegressionSolver import IllConditionedMatrixException, QuantileRegressionSolver # relatively high tolerance, since different implementation. TOL = 1e-3 @@ -12,6 +12,7 @@ # Basic tests # ############### + def test_basic_median_1(): quantreg = QuantileRegressionSolver() tau = 0.5 @@ -23,6 +24,7 @@ def test_basic_median_1(): # has to do with missing intercept assert all(np.abs(preds - [7.5, 7.5, 7.5, 15]) <= TOL) + def test_basic_median_2(): quantreg = QuantileRegressionSolver() tau = 0.5 @@ -32,6 +34,7 @@ def test_basic_median_2(): preds = quantreg.predict(x) assert all(np.abs(preds - [8, 8, 8, 15]) <= TOL) + def test_basic_lower(): quantreg = QuantileRegressionSolver() tau = 0.1 @@ -41,6 +44,7 @@ def test_basic_lower(): preds = quantreg.predict(x) assert all(np.abs(preds - [3, 3, 3, 15]) <= TOL) + def test_basic_upper(): quantreg = QuantileRegressionSolver() tau = 0.9 @@ -50,41 +54,47 @@ def test_basic_upper(): preds = quantreg.predict(x) assert all(np.abs(preds - [9, 9, 9, 15]) <= TOL) + ###################### # Intermediate tests # ###################### + def test_random_median(random_data_no_weights): quantreg = QuantileRegressionSolver() tau = 0.5 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values quantreg.fit(x, y, tau) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.57699, 6.74906, 4.40175, 4.85346, 4.51814]) <= TOL) + def test_random_lower(random_data_no_weights): quantreg = QuantileRegressionSolver() tau = 0.1 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values quantreg.fit(x, y, tau) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [0.17759, 6.99588, 4.18896, 4.83906, 3.22546]) <= TOL) + def test_random_upper(random_data_no_weights): quantreg = QuantileRegressionSolver() tau = 0.9 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values quantreg.fit(x, y, tau) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.85617, 6.81286, 6.05586, 5.51965, 4.19864]) <= TOL) + ###################### # Tests with weights # ###################### + def test_basic_median_weights(): quantreg = QuantileRegressionSolver() tau = 0.5 @@ -95,76 +105,84 @@ def test_basic_median_weights(): preds = quantreg.predict(x) assert all(np.abs(preds - [9, 9, 9, 15]) <= TOL) + def test_random_median_weights(random_data_weights): quantreg = QuantileRegressionSolver() tau = 0.5 - x = random_data_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_weights['y'].values - weights = random_data_weights['weights'].values + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + weights = random_data_weights["weights"].values quantreg.fit(x, y, tau, weights=weights) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.59521, 2.17864, 4.68050, 3.10920, 9.63739]) <= TOL) + def test_random_lower_weights(random_data_weights): quantreg = QuantileRegressionSolver() tau = 0.1 - x = random_data_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_weights['y'].values - weights = random_data_weights['weights'].values + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + weights = random_data_weights["weights"].values quantreg.fit(x, y, tau, weights=weights) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [0.63670, 1.27028, 4.81500, 3.08055, 8.69929]) <= TOL) + def test_random_upper_weights(random_data_weights): quantreg = QuantileRegressionSolver() tau = 0.9 - x = random_data_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_weights['y'].values - weights = random_data_weights['weights'].values + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + weights = random_data_weights["weights"].values quantreg.fit(x, y, tau, weights=weights) - preds = quantreg.predict(x) + quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [3.47742, 2.07360, 4.51754, 4.15237, 9.58856]) <= TOL) + ######################## # Test changing solver # ######################## + def test_changing_solver(random_data_no_weights): tau = 0.5 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values - - quantreg_scs = QuantileRegressionSolver(solver='SCS') - quantreg_ecos = QuantileRegressionSolver(solver='ECOS') + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values + + quantreg_scs = QuantileRegressionSolver(solver="SCS") + quantreg_ecos = QuantileRegressionSolver(solver="ECOS") quantreg_scs.fit(x, y, tau, save_problem=True) quantreg_ecos.fit(x, y, tau, save_problem=True) - + assert quantreg_scs.problem.value == pytest.approx(quantreg_ecos.problem.value, TOL) + def test_changing_solver_weights(random_data_weights): tau = 0.5 - x = random_data_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_weights['y'].values - weights = random_data_weights['weights'].values - - quantreg_scs = QuantileRegressionSolver(solver='SCS') - quantreg_ecos = QuantileRegressionSolver(solver='ECOS') + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + weights = random_data_weights["weights"].values + + quantreg_scs = QuantileRegressionSolver(solver="SCS") + quantreg_ecos = QuantileRegressionSolver(solver="ECOS") quantreg_scs.fit(x, y, tau, weights=weights, save_problem=True) quantreg_ecos.fit(x, y, tau, weights=weights, save_problem=True) assert quantreg_scs.problem.value == pytest.approx(quantreg_ecos.problem.value, TOL) + ####################### # Test saving problem # ####################### + def test_saving_problem(random_data_no_weights): tau = 0.5 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values - - quantreg = QuantileRegressionSolver(solver='ECOS') - + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values + + quantreg = QuantileRegressionSolver(solver="ECOS") + quantreg.fit(x, y, tau, save_problem=False) assert quantreg.problem is None @@ -175,17 +193,19 @@ def test_saving_problem(random_data_no_weights): quantreg.fit(x, y, tau, save_problem=False) assert quantreg.problem is None + ############################# # Test weight normalization # ############################# + def test_weight_normalization_divide_by_zero(random_data_no_weights): tau = 0.5 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values weights = [0] * x.shape[0] # all zero weights - - quantreg = QuantileRegressionSolver(solver='ECOS') + + quantreg = QuantileRegressionSolver(solver="ECOS") # Will succeed without weight normalization quantreg.fit(x, y, tau, normalize_weights=False, weights=weights) @@ -194,6 +214,7 @@ def test_weight_normalization_divide_by_zero(random_data_no_weights): with pytest.raises(ZeroDivisionError): quantreg.fit(x, y, tau, normalize_weights=True, weights=weights) + def test_weight_normalization_same_fit(random_data_weights): quantreg = QuantileRegressionSolver() tau = 0.5 @@ -210,17 +231,20 @@ def test_weight_normalization_same_fit(random_data_weights): preds = quantreg.predict(x) assert all(np.abs(preds - [9, 9, 9, 15]) <= TOL) + ######################## # Test checking matrix # ######################## + def test_ill_conditioned_error(): quantreg = QuantileRegressionSolver() x = np.asarray([[1, 0, 1], [4, 3, 4], [5, 2, 5]]) - with pytest.raises(IllConditionedMatrixException) as e_info: + with pytest.raises(IllConditionedMatrixException): quantreg._check_matrix_condition(x) + def test_ill_conditioned_warning(): quantreg = QuantileRegressionSolver() @@ -231,7 +255,6 @@ def test_ill_conditioned_warning(): matrix_check = quantreg._check_matrix_condition(x) assert not matrix_check -def test_ill_conditioned_warning(): quantreg = QuantileRegressionSolver() random_number_generator = np.random.RandomState(42) @@ -239,4 +262,4 @@ def test_ill_conditioned_warning(): sigma = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) x = random_number_generator.multivariate_normal(mu, sigma, size=3) matrix_check = quantreg._check_matrix_condition(x) - assert matrix_check \ No newline at end of file + assert matrix_check From 1534aa3cdf294c61949d4020786cd606cdad16f1 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 21:54:56 -0400 Subject: [PATCH 06/25] updated readme --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index aee86fc1..a59d0739 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,12 @@ Set up a virtual environment and run: > pip install -r requirements-dev.txt ``` +## Precommit +To run pre-commit for linting, run: +``` +pre-commit run --all-files +``` + ## Testing ``` > tox From 96bcc38f3adffe4727eba31202d6ae01d1d32956 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Mon, 17 Apr 2023 21:56:29 -0400 Subject: [PATCH 07/25] added github actions --- .github/workflows/pre-commit.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/pre-commit.yml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..b13f7c95 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,14 @@ +name: pre-commit +on: + pull_request: + branches-ignore: + - main +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.9' + - uses: pre-commit/action@v2.0.3 \ No newline at end of file From c78ac406161b8031d112c9265049fea545535f42 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Tue, 18 Apr 2023 13:52:44 -0400 Subject: [PATCH 08/25] ran pre-commit --- src/elexsolver/QuantileRegressionSolver.py | 6 ++++-- tests/test_quantile.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index c720a69e..3087ed10 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -62,7 +62,7 @@ def __solve(self, x, y, weights, lambda_, verbose): Sets up the optimization problem and solves it """ self._check_matrix_condition(x) - coefficients = cp.Variable((x.shape[1], )) + coefficients = cp.Variable((x.shape[1],)) loss_function = self.get_loss_function(x, y, coefficients, weights) loss_function += lambda_ * self.get_regularizer(coefficients) objective = cp.Minimize(loss_function) @@ -70,7 +70,9 @@ def __solve(self, x, y, weights, lambda_, verbose): problem.solve(solver=self.solver, verbose=verbose, **self.KWARGS.get(self.solver, {})) return coefficients, problem - def fit(self, x, y, tau_value=0.5, weights=None, lambda_=0, verbose=False, save_problem=False, normalize_weights=True): + def fit( + self, x, y, tau_value=0.5, weights=None, lambda_=0, verbose=False, save_problem=False, normalize_weights=True + ): """ Fit the (weighted) quantile regression problem. Weights should not sum to one. diff --git a/tests/test_quantile.py b/tests/test_quantile.py index 41a08b8a..d0c66634 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -236,10 +236,11 @@ def test_weight_normalization_same_fit(random_data_weights): # Test regularization # ######################## + def test_regularization(random_data_no_weights): tau = 0.5 - x = random_data_no_weights[['x0', 'x1', 'x2', 'x3', 'x4']].values - y = random_data_no_weights['y'].values + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values quantreg = QuantileRegressionSolver() lambda_ = 1e8 @@ -251,6 +252,7 @@ def test_regularization(random_data_no_weights): quantreg.fit(x, y, tau, save_problem=True) assert quantreg.problem.value < objective_w_reg + ######################## # Test checking matrix # ######################## From 961d98fdd6e501bb19780a5df77156831de9185a Mon Sep 17 00:00:00 2001 From: lbvienna Date: Tue, 18 Apr 2023 18:46:22 -0400 Subject: [PATCH 09/25] no longer throws warnings --- requirements-dev.txt | 3 ++- src/elexsolver/QuantileRegressionSolver.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 392cdbe0..cb4325a1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,4 +9,5 @@ pylint tox black isort -pre-commit \ No newline at end of file +pre-commit +pytest \ No newline at end of file diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index 3087ed10..2bc5c013 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -45,7 +45,7 @@ def _check_matrix_condition(self, x): f"Ill-conditioned matrix detected. Matrix condition number >= {self.CONDITION_ERROR_MIN}" ) elif condition_number >= self.CONDITION_WARNING_MIN: - LOG.warn("Ill-conditioned matrix detected. result is not guaranteed to be accurate") + LOG.warning("Ill-conditioned matrix detected. result is not guaranteed to be accurate") return False return True From e1f3ae68a6d65b2674cf5e3fd3ce894de91f0700 Mon Sep 17 00:00:00 2001 From: Dara Gold Date: Wed, 19 Apr 2023 18:29:18 -0400 Subject: [PATCH 10/25] nan tests moved --- src/elexsolver/QuantileRegressionSolver.py | 10 ++++++++++ tests/test_quantile.py | 21 ++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index 2bc5c013..a9cf09ba 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -77,6 +77,13 @@ def fit( Fit the (weighted) quantile regression problem. Weights should not sum to one. """ + + if np.any(np.isnan(x)): + LOG.warning("Warning: NaN values in reporting_units_features") + + if np.any(np.isnan(y)): + LOG.warning("Warning: NaN values in reporting_units_residuals") + if weights is None: # if weights are none, give unit weights weights = [1] * x.shape[0] if normalize_weights: @@ -98,4 +105,7 @@ def predict(self, x): """ Returns predictions """ + if np.any(np.isnan(x)): + LOG.warning("Warning: NaN values in nonreporting_units_features") + return self.coefficients @ x.T diff --git a/tests/test_quantile.py b/tests/test_quantile.py index d0c66634..70e3ec09 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -139,11 +139,30 @@ def test_random_upper_weights(random_data_weights): assert all(np.abs(quantreg.coefficients - [3.47742, 2.07360, 4.51754, 4.15237, 9.58856]) <= TOL) +def test_nan_warnings(random_data_weights): + quantreg = QuantileRegressionSolver() + tau = 0.9 + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + + with pytest.warns(None): + quantreg.fit(x, y, tau) + + x = np.vstack([x, [4,2,6,8,3]]) + y = np.append(y, np.nan) + with pytest.warns(UserWarning): + quantreg.fit(x, y, tau) + + quantreg.coefficients = [4,32, 4,24,7] + x = np.vstack([x, [4,2,6,np.nan,3]]) + with pytest.warns(UserWarning): + quantreg.predict(np.vstack([x, [4,2,6,np.nan,3]])) + + ######################## # Test changing solver # ######################## - def test_changing_solver(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values From 5784f2686dd9d7f48d5c5f808444196f854b216d Mon Sep 17 00:00:00 2001 From: Dara Gold Date: Wed, 19 Apr 2023 18:33:04 -0400 Subject: [PATCH 11/25] remove test temp --- tests/test_quantile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index 70e3ec09..8502b91e 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -155,8 +155,8 @@ def test_nan_warnings(random_data_weights): quantreg.coefficients = [4,32, 4,24,7] x = np.vstack([x, [4,2,6,np.nan,3]]) - with pytest.warns(UserWarning): - quantreg.predict(np.vstack([x, [4,2,6,np.nan,3]])) + # with pytest.warns(UserWarning): + # quantreg.predict(np.vstack([x, [4,2,6,np.nan,3]])) ######################## From 9f0c8607676de1df0d3c6bc911629317db1cdeb5 Mon Sep 17 00:00:00 2001 From: Dara Gold Date: Wed, 19 Apr 2023 18:33:27 -0400 Subject: [PATCH 12/25] formatted --- src/elexsolver/QuantileRegressionSolver.py | 6 +++--- tests/test_quantile.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index a9cf09ba..ea689af7 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -78,12 +78,12 @@ def fit( Weights should not sum to one. """ - if np.any(np.isnan(x)): + if np.any(np.isnan(x)): LOG.warning("Warning: NaN values in reporting_units_features") - if np.any(np.isnan(y)): + if np.any(np.isnan(y)): LOG.warning("Warning: NaN values in reporting_units_residuals") - + if weights is None: # if weights are none, give unit weights weights = [1] * x.shape[0] if normalize_weights: diff --git a/tests/test_quantile.py b/tests/test_quantile.py index 8502b91e..b3b4deff 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -144,25 +144,26 @@ def test_nan_warnings(random_data_weights): tau = 0.9 x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_weights["y"].values - + with pytest.warns(None): - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau) - x = np.vstack([x, [4,2,6,8,3]]) + x = np.vstack([x, [4, 2, 6, 8, 3]]) y = np.append(y, np.nan) with pytest.warns(UserWarning): quantreg.fit(x, y, tau) - - quantreg.coefficients = [4,32, 4,24,7] - x = np.vstack([x, [4,2,6,np.nan,3]]) + + quantreg.coefficients = [4, 32, 4, 24, 7] + x = np.vstack([x, [4, 2, 6, np.nan, 3]]) # with pytest.warns(UserWarning): # quantreg.predict(np.vstack([x, [4,2,6,np.nan,3]])) - + ######################## # Test changing solver # ######################## + def test_changing_solver(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values From ac67ca824038976eea4343fe7b8d66806d35ff22 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 11:29:30 -0400 Subject: [PATCH 13/25] moved to function, changed warning to throw exception if inf/nan is passed. should be caught where solver is used if necessary --- src/elexsolver/QuantileRegressionSolver.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index ea689af7..b17c7494 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -49,6 +49,13 @@ def _check_matrix_condition(self, x): return False return True + def _check_any_element_nan_or_inf(self, x): + """ + Check whether any element in a matrix or vector is NaN or infinity + """ + if np.any(np.isnan(x)) or np.any(np.isinf(x)): + raise ValueError("Array contains NaN or Infinity") + def get_loss_function(self, x, y, coefficients, weights): y_hat = x @ coefficients residual = y - y_hat @@ -78,11 +85,8 @@ def fit( Weights should not sum to one. """ - if np.any(np.isnan(x)): - LOG.warning("Warning: NaN values in reporting_units_features") - - if np.any(np.isnan(y)): - LOG.warning("Warning: NaN values in reporting_units_residuals") + self._check_any_element_nan_or_inf(x) + self._check_any_element_nan_or_inf(y) if weights is None: # if weights are none, give unit weights weights = [1] * x.shape[0] @@ -105,7 +109,6 @@ def predict(self, x): """ Returns predictions """ - if np.any(np.isnan(x)): - LOG.warning("Warning: NaN values in nonreporting_units_features") + self._check_any_element_nan_or_inf(x) return self.coefficients @ x.T From 74752afd1521dd952e166d7042f51736b2ec55fc Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 11:29:41 -0400 Subject: [PATCH 14/25] updated unit tests --- tests/test_quantile.py | 58 +++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index b3b4deff..b46c8a93 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -1,5 +1,6 @@ import numpy as np import pytest +import warnings from elexsolver.QuantileRegressionSolver import IllConditionedMatrixException, QuantileRegressionSolver @@ -139,26 +140,6 @@ def test_random_upper_weights(random_data_weights): assert all(np.abs(quantreg.coefficients - [3.47742, 2.07360, 4.51754, 4.15237, 9.58856]) <= TOL) -def test_nan_warnings(random_data_weights): - quantreg = QuantileRegressionSolver() - tau = 0.9 - x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values - y = random_data_weights["y"].values - - with pytest.warns(None): - quantreg.fit(x, y, tau) - - x = np.vstack([x, [4, 2, 6, 8, 3]]) - y = np.append(y, np.nan) - with pytest.warns(UserWarning): - quantreg.fit(x, y, tau) - - quantreg.coefficients = [4, 32, 4, 24, 7] - x = np.vstack([x, [4, 2, 6, np.nan, 3]]) - # with pytest.warns(UserWarning): - # quantreg.predict(np.vstack([x, [4,2,6,np.nan,3]])) - - ######################## # Test changing solver # ######################## @@ -304,3 +285,40 @@ def test_ill_conditioned_warning(): x = random_number_generator.multivariate_normal(mu, sigma, size=3) matrix_check = quantreg._check_matrix_condition(x) assert matrix_check + + +######################## +# Test checking NaN/Inf # +######################## + +def test_no_nan_inf_error(random_data_weights): + quantreg = QuantileRegressionSolver() + tau = 0.9 + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_weights["y"].values + + x[0, 0] = np.nan + with pytest.raises(ValueError): + quantreg.fit(x, y, tau) + + x[0, 0] = np.inf + with pytest.raises(ValueError): + quantreg.fit(x, y, tau) + + x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values + y[5] = np.nan + with pytest.raises(ValueError): + quantreg.fit(x, y, tau) + + y[5] = np.inf + with pytest.raises(ValueError): + quantreg.fit(x, y, tau) + + quantreg.coefficients = [4, 32, 4, 24, 7] + x = np.vstack([x, [4, 2, 6, np.nan, 3]]) + with pytest.raises(ValueError): + quantreg.predict(x) + + x = np.vstack([x, [4, 2, 6, np.inf, 3]]) + with pytest.raises(ValueError): + quantreg.predict(x) \ No newline at end of file From ee84668a7a0ef46768395bf7bb5e95a6152c7c4c Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 11:34:15 -0400 Subject: [PATCH 15/25] ran pre-commit --- tests/test_quantile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index b46c8a93..99feb76b 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -1,6 +1,5 @@ import numpy as np import pytest -import warnings from elexsolver.QuantileRegressionSolver import IllConditionedMatrixException, QuantileRegressionSolver @@ -291,6 +290,7 @@ def test_ill_conditioned_warning(): # Test checking NaN/Inf # ######################## + def test_no_nan_inf_error(random_data_weights): quantreg = QuantileRegressionSolver() tau = 0.9 @@ -300,7 +300,7 @@ def test_no_nan_inf_error(random_data_weights): x[0, 0] = np.nan with pytest.raises(ValueError): quantreg.fit(x, y, tau) - + x[0, 0] = np.inf with pytest.raises(ValueError): quantreg.fit(x, y, tau) @@ -309,7 +309,7 @@ def test_no_nan_inf_error(random_data_weights): y[5] = np.nan with pytest.raises(ValueError): quantreg.fit(x, y, tau) - + y[5] = np.inf with pytest.raises(ValueError): quantreg.fit(x, y, tau) @@ -321,4 +321,4 @@ def test_no_nan_inf_error(random_data_weights): x = np.vstack([x, [4, 2, 6, np.inf, 3]]) with pytest.raises(ValueError): - quantreg.predict(x) \ No newline at end of file + quantreg.predict(x) From 172a878af482547c696a5f3e9297e2759365c933 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 22:28:18 -0400 Subject: [PATCH 16/25] added fit intercept parameter, if true, don't regularize intercept --- src/elexsolver/QuantileRegressionSolver.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index b17c7494..e7979e34 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -61,24 +61,29 @@ def get_loss_function(self, x, y, coefficients, weights): residual = y - y_hat return cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) - def get_regularizer(self, coefficients): - return cp.pnorm(coefficients, p=2) ** 2 - - def __solve(self, x, y, weights, lambda_, verbose): + def get_regularizer(self, coefficients, fit_intercept): + # coefficient for intercept should not get regularized + # NOTE: this now assumes that the first column is an intercept + coefficients_to_regularize = coefficients + if fit_intercept: + coefficients_to_regularize = coefficients[1:] + return cp.pnorm(coefficients_to_regularize, p=2) ** 2 + + def __solve(self, x, y, weights, lambda_, fit_intercept, verbose): """ Sets up the optimization problem and solves it """ self._check_matrix_condition(x) coefficients = cp.Variable((x.shape[1],)) loss_function = self.get_loss_function(x, y, coefficients, weights) - loss_function += lambda_ * self.get_regularizer(coefficients) + loss_function += lambda_ * self.get_regularizer(coefficients, fit_intercept) objective = cp.Minimize(loss_function) problem = cp.Problem(objective) problem.solve(solver=self.solver, verbose=verbose, **self.KWARGS.get(self.solver, {})) return coefficients, problem def fit( - self, x, y, tau_value=0.5, weights=None, lambda_=0, verbose=False, save_problem=False, normalize_weights=True + self, x, y, tau_value=0.5, weights=None, lambda_=0, fit_intercept=True, verbose=False, save_problem=False, normalize_weights=True ): """ Fit the (weighted) quantile regression problem. @@ -98,7 +103,7 @@ def fit( weights = weights / weights_sum self.tau.value = tau_value - coefficients, problem = self.__solve(x, y, weights, lambda_, verbose) + coefficients, problem = self.__solve(x, y, weights, lambda_, fit_intercept, verbose) self.coefficients = coefficients.value if save_problem: self.problem = problem From 92d16d128927d9b0d8e3bd957204fafc01bcd986 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 22:28:28 -0400 Subject: [PATCH 17/25] updated unit tests --- tests/test_quantile.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index 99feb76b..b10e8f1a 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -237,21 +237,35 @@ def test_weight_normalization_same_fit(random_data_weights): ######################## -def test_regularization(random_data_no_weights): +def test_regularization_with_intercept(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_no_weights["y"].values quantreg = QuantileRegressionSolver() - lambda_ = 1e8 - quantreg.fit(x, y, tau, lambda_=lambda_, save_problem=True) + lambda_ = 1e6 + quantreg.fit(x, y, tau, lambda_=lambda_, fit_intercept=True, save_problem=True) coefficients_w_reg = quantreg.coefficients - assert all(np.abs(coefficients_w_reg - [0, 0, 0, 0, 0]) <= TOL) + assert all(np.abs(coefficients_w_reg[1:] - [0, 0, 0, 0]) <= TOL) + assert np.abs(coefficients_w_reg[0]) > TOL objective_w_reg = quantreg.problem.value quantreg.fit(x, y, tau, save_problem=True) assert quantreg.problem.value < objective_w_reg +def test_regularization_without_intercept(random_data_no_weights): + tau = 0.5 + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values + + quantreg = QuantileRegressionSolver() + lambda_ = 1e6 + quantreg.fit(x, y, tau, lambda_=lambda_, fit_intercept=False, save_problem=True) + coefficients_w_reg = quantreg.coefficients + assert all(np.abs(coefficients_w_reg - [0, 0, 0, 0, 0]) <= TOL) + + + ######################## # Test checking matrix # From d8ca52f03211b382a0a2d26099d94511e0dfa436 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 22:31:46 -0400 Subject: [PATCH 18/25] updated comments --- src/elexsolver/QuantileRegressionSolver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index e7979e34..c10e4b76 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -62,8 +62,8 @@ def get_loss_function(self, x, y, coefficients, weights): return cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) def get_regularizer(self, coefficients, fit_intercept): - # coefficient for intercept should not get regularized - # NOTE: this now assumes that the first column is an intercept + # if we are fitting an intercept in the model, then that coefficient should not be regularized. + # NOTE: assumes that if fit_intercept=True, that the intercept is in the first column coefficients_to_regularize = coefficients if fit_intercept: coefficients_to_regularize = coefficients[1:] @@ -88,6 +88,7 @@ def fit( """ Fit the (weighted) quantile regression problem. Weights should not sum to one. + If fit_intercept=True then intercept is assumed to be the first column in `x` """ self._check_any_element_nan_or_inf(x) From c32e1ab07275e7e89d16dbaf2ebff3c41dfc9ea7 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 20 Apr 2023 22:32:04 -0400 Subject: [PATCH 19/25] ran pre-commit --- src/elexsolver/QuantileRegressionSolver.py | 11 ++++++++++- tests/test_quantile.py | 3 +-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index c10e4b76..e081fb55 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -83,7 +83,16 @@ def __solve(self, x, y, weights, lambda_, fit_intercept, verbose): return coefficients, problem def fit( - self, x, y, tau_value=0.5, weights=None, lambda_=0, fit_intercept=True, verbose=False, save_problem=False, normalize_weights=True + self, + x, + y, + tau_value=0.5, + weights=None, + lambda_=0, + fit_intercept=True, + verbose=False, + save_problem=False, + normalize_weights=True, ): """ Fit the (weighted) quantile regression problem. diff --git a/tests/test_quantile.py b/tests/test_quantile.py index b10e8f1a..db49dd88 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -253,6 +253,7 @@ def test_regularization_with_intercept(random_data_no_weights): quantreg.fit(x, y, tau, save_problem=True) assert quantreg.problem.value < objective_w_reg + def test_regularization_without_intercept(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values @@ -265,8 +266,6 @@ def test_regularization_without_intercept(random_data_no_weights): assert all(np.abs(coefficients_w_reg - [0, 0, 0, 0, 0]) <= TOL) - - ######################## # Test checking matrix # ######################## From 4764963bfad188e7c83122388215a728814ff616 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 16:16:03 -0400 Subject: [PATCH 20/25] warnings emitted by warnings now logged properly --- src/elexsolver/QuantileRegressionSolver.py | 13 +++++++++---- src/elexsolver/logging.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index e081fb55..b5cf5065 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -1,5 +1,5 @@ import logging - +import warnings import cvxpy as cp import numpy as np @@ -45,9 +45,7 @@ def _check_matrix_condition(self, x): f"Ill-conditioned matrix detected. Matrix condition number >= {self.CONDITION_ERROR_MIN}" ) elif condition_number >= self.CONDITION_WARNING_MIN: - LOG.warning("Ill-conditioned matrix detected. result is not guaranteed to be accurate") - return False - return True + warnings.warn("Warning: Ill-conditioned matrix detected. result is not guaranteed to be accurate") def _check_any_element_nan_or_inf(self, x): """ @@ -56,6 +54,10 @@ def _check_any_element_nan_or_inf(self, x): if np.any(np.isnan(x)) or np.any(np.isinf(x)): raise ValueError("Array contains NaN or Infinity") + def _check_intercept(self, x): + if ~np.all(x[:,0] == 1): + warnings.warn("Warning: fit_intercept=True and not all elements of the first columns are 1s") + def get_loss_function(self, x, y, coefficients, weights): y_hat = x @ coefficients residual = y - y_hat @@ -103,6 +105,9 @@ def fit( self._check_any_element_nan_or_inf(x) self._check_any_element_nan_or_inf(y) + if fit_intercept: + self._check_intercept(x) + if weights is None: # if weights are none, give unit weights weights = [1] * x.shape[0] if normalize_weights: diff --git a/src/elexsolver/logging.py b/src/elexsolver/logging.py index fdee5c5f..a2274d8b 100644 --- a/src/elexsolver/logging.py +++ b/src/elexsolver/logging.py @@ -31,4 +31,5 @@ def initialize_logging(logging_config=None): app_log_level = os.getenv("APP_LOG_LEVEL", "INFO") LOGGING_CONFIG["loggers"]["elexsolver"]["level"] = app_log_level logging_config = LOGGING_CONFIG + logging.captureWarnings(True) logging.config.dictConfig(logging_config) From 8ea2d3c9e7b76feb1e3ee8a21bee0d0cd1476989 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 16:16:12 -0400 Subject: [PATCH 21/25] updated unittests --- tests/test_quantile.py | 64 ++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/tests/test_quantile.py b/tests/test_quantile.py index db49dd88..5965e938 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -18,7 +18,7 @@ def test_basic_median_1(): tau = 0.5 x = np.asarray([[1], [1], [1], [2]]) y = np.asarray([3, 8, 9, 15]) - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) preds = quantreg.predict(x) # you'd think it would be 8 instead of 7.5, but run quantreg in R to confirm # has to do with missing intercept @@ -65,7 +65,7 @@ def test_random_median(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_no_weights["y"].values - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.57699, 6.74906, 4.40175, 4.85346, 4.51814]) <= TOL) @@ -75,7 +75,7 @@ def test_random_lower(random_data_no_weights): tau = 0.1 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_no_weights["y"].values - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [0.17759, 6.99588, 4.18896, 4.83906, 3.22546]) <= TOL) @@ -85,7 +85,7 @@ def test_random_upper(random_data_no_weights): tau = 0.9 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_no_weights["y"].values - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.85617, 6.81286, 6.05586, 5.51965, 4.19864]) <= TOL) @@ -112,7 +112,7 @@ def test_random_median_weights(random_data_weights): x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_weights["y"].values weights = random_data_weights["weights"].values - quantreg.fit(x, y, tau, weights=weights) + quantreg.fit(x, y, tau, weights=weights, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [1.59521, 2.17864, 4.68050, 3.10920, 9.63739]) <= TOL) @@ -123,7 +123,7 @@ def test_random_lower_weights(random_data_weights): x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_weights["y"].values weights = random_data_weights["weights"].values - quantreg.fit(x, y, tau, weights=weights) + quantreg.fit(x, y, tau, weights=weights, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [0.63670, 1.27028, 4.81500, 3.08055, 8.69929]) <= TOL) @@ -134,7 +134,7 @@ def test_random_upper_weights(random_data_weights): x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values y = random_data_weights["y"].values weights = random_data_weights["weights"].values - quantreg.fit(x, y, tau, weights=weights) + quantreg.fit(x, y, tau, weights=weights, fit_intercept=False) quantreg.predict(x) assert all(np.abs(quantreg.coefficients - [3.47742, 2.07360, 4.51754, 4.15237, 9.58856]) <= TOL) @@ -151,8 +151,8 @@ def test_changing_solver(random_data_no_weights): quantreg_scs = QuantileRegressionSolver(solver="SCS") quantreg_ecos = QuantileRegressionSolver(solver="ECOS") - quantreg_scs.fit(x, y, tau, save_problem=True) - quantreg_ecos.fit(x, y, tau, save_problem=True) + quantreg_scs.fit(x, y, tau, save_problem=True, fit_intercept=False) + quantreg_ecos.fit(x, y, tau, save_problem=True, fit_intercept=False) assert quantreg_scs.problem.value == pytest.approx(quantreg_ecos.problem.value, TOL) @@ -165,8 +165,8 @@ def test_changing_solver_weights(random_data_weights): quantreg_scs = QuantileRegressionSolver(solver="SCS") quantreg_ecos = QuantileRegressionSolver(solver="ECOS") - quantreg_scs.fit(x, y, tau, weights=weights, save_problem=True) - quantreg_ecos.fit(x, y, tau, weights=weights, save_problem=True) + quantreg_scs.fit(x, y, tau, weights=weights, save_problem=True, fit_intercept=False) + quantreg_ecos.fit(x, y, tau, weights=weights, save_problem=True, fit_intercept=False) assert quantreg_scs.problem.value == pytest.approx(quantreg_ecos.problem.value, TOL) @@ -183,14 +183,14 @@ def test_saving_problem(random_data_no_weights): quantreg = QuantileRegressionSolver(solver="ECOS") - quantreg.fit(x, y, tau, save_problem=False) + quantreg.fit(x, y, tau, save_problem=False, fit_intercept=False) assert quantreg.problem is None - quantreg.fit(x, y, tau, save_problem=True) + quantreg.fit(x, y, tau, save_problem=True, fit_intercept=False) assert quantreg.problem is not None # testing whether overwrite works - quantreg.fit(x, y, tau, save_problem=False) + quantreg.fit(x, y, tau, save_problem=False, fit_intercept=False) assert quantreg.problem is None @@ -208,11 +208,11 @@ def test_weight_normalization_divide_by_zero(random_data_no_weights): quantreg = QuantileRegressionSolver(solver="ECOS") # Will succeed without weight normalization - quantreg.fit(x, y, tau, normalize_weights=False, weights=weights) + quantreg.fit(x, y, tau, normalize_weights=False, weights=weights, fit_intercept=False) # Will fail with weight normalization with pytest.raises(ZeroDivisionError): - quantreg.fit(x, y, tau, normalize_weights=True, weights=weights) + quantreg.fit(x, y, tau, normalize_weights=True, weights=weights, fit_intercept=False) def test_weight_normalization_same_fit(random_data_weights): @@ -240,6 +240,7 @@ def test_weight_normalization_same_fit(random_data_weights): def test_regularization_with_intercept(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + x[:,0] = 1 y = random_data_no_weights["y"].values quantreg = QuantileRegressionSolver() @@ -253,6 +254,16 @@ def test_regularization_with_intercept(random_data_no_weights): quantreg.fit(x, y, tau, save_problem=True) assert quantreg.problem.value < objective_w_reg +def test_regularization_with_intercept_warning(random_data_no_weights, caplog): + caplog.clear() + tau = 0.5 + x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values + y = random_data_no_weights["y"].values + + quantreg = QuantileRegressionSolver() + lambda_ = 1e6 + with pytest.warns(UserWarning): + quantreg.fit(x, y, tau, lambda_=lambda_, fit_intercept=True, save_problem=True) def test_regularization_without_intercept(random_data_no_weights): tau = 0.5 @@ -286,17 +297,8 @@ def test_ill_conditioned_warning(): mu = np.asarray([1, 3, 5]) sigma = np.asarray([[1, 0.9, 0], [0.9, 1, 0], [0, 0, 1]]) x = random_number_generator.multivariate_normal(mu, sigma, size=3) - matrix_check = quantreg._check_matrix_condition(x) - assert not matrix_check - - quantreg = QuantileRegressionSolver() - - random_number_generator = np.random.RandomState(42) - mu = np.asarray([1, 3, 5]) - sigma = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - x = random_number_generator.multivariate_normal(mu, sigma, size=3) - matrix_check = quantreg._check_matrix_condition(x) - assert matrix_check + with pytest.warns(UserWarning): + matrix_check = quantreg._check_matrix_condition(x) ######################## @@ -312,20 +314,20 @@ def test_no_nan_inf_error(random_data_weights): x[0, 0] = np.nan with pytest.raises(ValueError): - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) x[0, 0] = np.inf with pytest.raises(ValueError): - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) x = random_data_weights[["x0", "x1", "x2", "x3", "x4"]].values y[5] = np.nan with pytest.raises(ValueError): - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) y[5] = np.inf with pytest.raises(ValueError): - quantreg.fit(x, y, tau) + quantreg.fit(x, y, tau, fit_intercept=False) quantreg.coefficients = [4, 32, 4, 24, 7] x = np.vstack([x, [4, 2, 6, np.nan, 3]]) From 72198ebfbf1104b83846dcfdd5539a2af3544153 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 16:16:55 -0400 Subject: [PATCH 22/25] linting --- src/elexsolver/QuantileRegressionSolver.py | 3 ++- tests/test_quantile.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index b5cf5065..14657796 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -1,5 +1,6 @@ import logging import warnings + import cvxpy as cp import numpy as np @@ -55,7 +56,7 @@ def _check_any_element_nan_or_inf(self, x): raise ValueError("Array contains NaN or Infinity") def _check_intercept(self, x): - if ~np.all(x[:,0] == 1): + if ~np.all(x[:, 0] == 1): warnings.warn("Warning: fit_intercept=True and not all elements of the first columns are 1s") def get_loss_function(self, x, y, coefficients, weights): diff --git a/tests/test_quantile.py b/tests/test_quantile.py index 5965e938..72c44e28 100644 --- a/tests/test_quantile.py +++ b/tests/test_quantile.py @@ -240,7 +240,7 @@ def test_weight_normalization_same_fit(random_data_weights): def test_regularization_with_intercept(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values - x[:,0] = 1 + x[:, 0] = 1 y = random_data_no_weights["y"].values quantreg = QuantileRegressionSolver() @@ -254,6 +254,7 @@ def test_regularization_with_intercept(random_data_no_weights): quantreg.fit(x, y, tau, save_problem=True) assert quantreg.problem.value < objective_w_reg + def test_regularization_with_intercept_warning(random_data_no_weights, caplog): caplog.clear() tau = 0.5 @@ -262,9 +263,10 @@ def test_regularization_with_intercept_warning(random_data_no_weights, caplog): quantreg = QuantileRegressionSolver() lambda_ = 1e6 - with pytest.warns(UserWarning): + with pytest.warns(UserWarning): quantreg.fit(x, y, tau, lambda_=lambda_, fit_intercept=True, save_problem=True) + def test_regularization_without_intercept(random_data_no_weights): tau = 0.5 x = random_data_no_weights[["x0", "x1", "x2", "x3", "x4"]].values @@ -298,7 +300,7 @@ def test_ill_conditioned_warning(): sigma = np.asarray([[1, 0.9, 0], [0.9, 1, 0], [0, 0, 1]]) x = random_number_generator.multivariate_normal(mu, sigma, size=3) with pytest.warns(UserWarning): - matrix_check = quantreg._check_matrix_condition(x) + quantreg._check_matrix_condition(x) ######################## From ca31f131c5fff0fb86924d8a89468fce4e382793 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 16:53:59 -0400 Subject: [PATCH 23/25] added comment --- src/elexsolver/QuantileRegressionSolver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index 14657796..ae519d4a 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -56,6 +56,9 @@ def _check_any_element_nan_or_inf(self, x): raise ValueError("Array contains NaN or Infinity") def _check_intercept(self, x): + """ + Check whether the first column is all 1s (normal intercept) otherwise raises a warning. + """ if ~np.all(x[:, 0] == 1): warnings.warn("Warning: fit_intercept=True and not all elements of the first columns are 1s") From 081b2fce755befd39f56ee18f434a48c60b0b451 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 16:56:45 -0400 Subject: [PATCH 24/25] updated some comments --- src/elexsolver/QuantileRegressionSolver.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/elexsolver/QuantileRegressionSolver.py b/src/elexsolver/QuantileRegressionSolver.py index ae519d4a..3fe12560 100644 --- a/src/elexsolver/QuantileRegressionSolver.py +++ b/src/elexsolver/QuantileRegressionSolver.py @@ -63,11 +63,17 @@ def _check_intercept(self, x): warnings.warn("Warning: fit_intercept=True and not all elements of the first columns are 1s") def get_loss_function(self, x, y, coefficients, weights): + """ + Get the quantile regression loss function + """ y_hat = x @ coefficients residual = y - y_hat return cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual)) def get_regularizer(self, coefficients, fit_intercept): + """ + Get regularization component of the loss function. Note that this is L2 (ridge) regularization. + """ # if we are fitting an intercept in the model, then that coefficient should not be regularized. # NOTE: assumes that if fit_intercept=True, that the intercept is in the first column coefficients_to_regularize = coefficients From 8e9cca3bd47098572962fed5450a855c920f66d3 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Fri, 21 Apr 2023 17:52:01 -0400 Subject: [PATCH 25/25] updated setup.py and changelog --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80097200..1580391f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +### 1.1.0 - 2023-04-21 +- fix: Not regularizing intercept coefficient + better warning handling [#8](https://github.com/washingtonpost/elex-solver/pull/8) +- feat: Throw error when encountering NaN/Inf [#7](https://github.com/washingtonpost/elex-solver/pull/7) +- fix: fix deprecated warning [#6](https://github.com/washingtonpost/elex-solver/pull/6) +- chore: Add pre-commit linting and hook [#5](https://github.com/washingtonpost/elex-solver/pull/5) +- feat: Add regularization [#4](https://github.com/washingtonpost/elex-solver/pull/4) + ### 1.0.3 - 2022-11-07 - Add gitignore, codeowners, PR template, unit test workflow diff --git a/setup.py b/setup.py index 81d74eda..a1e5824e 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ LONG_DESCRIPTION = f.read() # The full version, including alpha/beta/rc tags -RELEASE = "1.0.3" +RELEASE = "1.1.0" # The short X.Y version VERSION = ".".join(RELEASE.split(".")[:2])