Skip to content

Commit

Permalink
Merge pull request #9 from washingtonpost/release/1.1.0
Browse files Browse the repository at this point in the history
updated setup.py and changelog
  • Loading branch information
lennybronner authored Apr 21, 2023
2 parents ec202c2 + 8e9cca3 commit 717ad24
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 134 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: pre-commit
on:
pull_request:
branches-ignore:
- main
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.9'
- uses: pre-commit/[email protected]
38 changes: 38 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
files: \.py$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace

- repo: https://github.com/pycqa/isort
rev: 5.11.5
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", --line-length=120]

# black
- repo: https://github.com/ambv/black
rev: 22.3.0
hooks:
- id: black
args: # arguments to configure black
- --line-length=120
language_version: python3

# flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: flake8
args: # arguments to configure flake8
# making flake8 line length compatible with black
- "--max-line-length=120"
- "--max-complexity=18"
- "--select=B,C,E,F,W,T4,B9"
# these are errors that will be ignored by flake8
# definitions here
# https://flake8.pycqa.org/en/latest/user/error-codes.html
- "--ignore=E266,E501,W503"
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

### 1.1.0 - 2023-04-21
- fix: Not regularizing intercept coefficient + better warning handling [#8](https://github.com/washingtonpost/elex-solver/pull/8)
- feat: Throw error when encountering NaN/Inf [#7](https://github.com/washingtonpost/elex-solver/pull/7)
- fix: fix deprecated warning [#6](https://github.com/washingtonpost/elex-solver/pull/6)
- chore: Add pre-commit linting and hook [#5](https://github.com/washingtonpost/elex-solver/pull/5)
- feat: Add regularization [#4](https://github.com/washingtonpost/elex-solver/pull/4)

### 1.0.3 - 2022-11-07
- Add gitignore, codeowners, PR template, unit test workflow

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ Set up a virtual environment and run:
> pip install -r requirements-dev.txt
```

## Precommit
To run pre-commit for linting, run:
```
pre-commit run --all-files
```

## Testing
```
> tox
Expand Down
6 changes: 5 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@ autopep8
betamax
betamax-serializers
pylint
tox
tox
black
isort
pre-commit
pytest
49 changes: 24 additions & 25 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,51 @@
from codecs import open
import os
from codecs import open

from setuptools import find_packages, setup

INSTALL_REQUIRES = (
'cvxpy<=1.2.0'
)
INSTALL_REQUIRES = "cvxpy<=1.2.0"

THIS_FILE_DIR = os.path.dirname(__file__)

LONG_DESCRIPTION = ''
LONG_DESCRIPTION = ""
# Get the long description from the README file
with open(os.path.join(THIS_FILE_DIR, 'README.md'), encoding='utf-8') as f:
with open(os.path.join(THIS_FILE_DIR, "README.md"), encoding="utf-8") as f:
LONG_DESCRIPTION = f.read()

# The full version, including alpha/beta/rc tags
RELEASE = '1.0.3'
RELEASE = "1.1.0"
# The short X.Y version
VERSION = '.'.join(RELEASE.split('.')[:2])
VERSION = ".".join(RELEASE.split(".")[:2])

PROJECT = 'elex-solver'
AUTHOR = 'The Wapo Newsroom Engineering Team'
COPYRIGHT = '2021, {}'.format(AUTHOR)
PROJECT = "elex-solver"
AUTHOR = "The Wapo Newsroom Engineering Team"
COPYRIGHT = "2021, {}".format(AUTHOR)


setup(
name=PROJECT,
version=RELEASE,
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 3.7'
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 3.7",
],
description='A package for optimization solvers',
description="A package for optimization solvers",
long_description=LONG_DESCRIPTION,
long_description_content_type="text/markdown",
license='MIT',
packages=find_packages('src', exclude=['docs', 'tests']),
package_dir={'': 'src'},
license="MIT",
packages=find_packages("src", exclude=["docs", "tests"]),
package_dir={"": "src"},
include_package_data=True,
zip_safe=False,
install_requires=INSTALL_REQUIRES,
command_options={
'build_sphinx': {
'project': ('setup.py', PROJECT),
'version': ('setup.py', VERSION),
'release': ('setup.py', RELEASE)
"build_sphinx": {
"project": ("setup.py", PROJECT),
"version": ("setup.py", VERSION),
"release": ("setup.py", RELEASE),
}
},
py_modules=['elexsolver']
)
py_modules=["elexsolver"],
)
95 changes: 73 additions & 22 deletions src/elexsolver/QuantileRegressionSolver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import warnings

import cvxpy as cp
import numpy as np
Expand All @@ -9,25 +10,24 @@

LOG = logging.getLogger(__name__)


class QuantileRegressionSolverException(Exception):
pass


class IllConditionedMatrixException(QuantileRegressionSolverException):
pass

class QuantileRegressionSolver():

VALID_SOLVERS = {'SCS', 'ECOS', 'MOSEK', 'OSQP', 'CVXOPT', 'GLPK'}
KWARGS = {
"ECOS": {
"max_iters": 10000
}
}
class QuantileRegressionSolver:

VALID_SOLVERS = {"SCS", "ECOS", "MOSEK", "OSQP", "CVXOPT", "GLPK"}
KWARGS = {"ECOS": {"max_iters": 10000}}

CONDITION_WARNING_MIN = 50 # arbitrary
CONDITION_ERROR_MIN = 1e+8 # based on scipy
CONDITION_WARNING_MIN = 50 # arbitrary
CONDITION_ERROR_MIN = 1e8 # based on scipy

def __init__(self, solver='ECOS'):
def __init__(self, solver="ECOS"):
if solver not in self.VALID_SOLVERS:
raise ValueError(f"solver must be in {self.VALID_SOLVERS}")
self.tau = cp.Parameter()
Expand All @@ -46,40 +46,89 @@ def _check_matrix_condition(self, x):
f"Ill-conditioned matrix detected. Matrix condition number >= {self.CONDITION_ERROR_MIN}"
)
elif condition_number >= self.CONDITION_WARNING_MIN:
LOG.warn(f"Ill-conditioned matrix detected. result is not guaranteed to be accurate")
return False
return True
warnings.warn("Warning: Ill-conditioned matrix detected. result is not guaranteed to be accurate")

def __solve(self, x, y, weights, verbose):
def _check_any_element_nan_or_inf(self, x):
"""
Sets up the optimization problem and solves it
Check whether any element in a matrix or vector is NaN or infinity
"""
if np.any(np.isnan(x)) or np.any(np.isinf(x)):
raise ValueError("Array contains NaN or Infinity")

def _check_intercept(self, x):
"""
Check whether the first column is all 1s (normal intercept) otherwise raises a warning.
"""
if ~np.all(x[:, 0] == 1):
warnings.warn("Warning: fit_intercept=True and not all elements of the first columns are 1s")

def get_loss_function(self, x, y, coefficients, weights):
"""
Get the quantile regression loss function
"""
self._check_matrix_condition(x)
coefficients = cp.Variable((x.shape[1], ))
y_hat = x @ coefficients
residual = y - y_hat
loss_function = cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual))
return cp.sum(cp.multiply(weights, 0.5 * cp.abs(residual) + (self.tau.value - 0.5) * residual))

def get_regularizer(self, coefficients, fit_intercept):
"""
Get regularization component of the loss function. Note that this is L2 (ridge) regularization.
"""
# if we are fitting an intercept in the model, then that coefficient should not be regularized.
# NOTE: assumes that if fit_intercept=True, that the intercept is in the first column
coefficients_to_regularize = coefficients
if fit_intercept:
coefficients_to_regularize = coefficients[1:]
return cp.pnorm(coefficients_to_regularize, p=2) ** 2

def __solve(self, x, y, weights, lambda_, fit_intercept, verbose):
"""
Sets up the optimization problem and solves it
"""
self._check_matrix_condition(x)
coefficients = cp.Variable((x.shape[1],))
loss_function = self.get_loss_function(x, y, coefficients, weights)
loss_function += lambda_ * self.get_regularizer(coefficients, fit_intercept)
objective = cp.Minimize(loss_function)
problem = cp.Problem(objective)
problem.solve(solver=self.solver, verbose=verbose, **self.KWARGS.get(self.solver, {}))
return coefficients, problem

def fit(self, x, y, tau_value=0.5, weights=None, verbose=False, save_problem=False, normalize_weights=True):
def fit(
self,
x,
y,
tau_value=0.5,
weights=None,
lambda_=0,
fit_intercept=True,
verbose=False,
save_problem=False,
normalize_weights=True,
):
"""
Fit the (weighted) quantile regression problem.
Weights should not sum to one.
If fit_intercept=True then intercept is assumed to be the first column in `x`
"""
if weights is None: # if weights are none, give unit weights

self._check_any_element_nan_or_inf(x)
self._check_any_element_nan_or_inf(y)

if fit_intercept:
self._check_intercept(x)

if weights is None: # if weights are none, give unit weights
weights = [1] * x.shape[0]
if normalize_weights:
weights_sum = np.sum(weights)
if weights_sum == 0:
# This should not happen
raise ZeroDivisionError
weights = weights / weights_sum

self.tau.value = tau_value
coefficients, problem = self.__solve(x, y, weights, verbose)
coefficients, problem = self.__solve(x, y, weights, lambda_, fit_intercept, verbose)
self.coefficients = coefficients.value
if save_problem:
self.problem = problem
Expand All @@ -90,4 +139,6 @@ def predict(self, x):
"""
Returns predictions
"""
self._check_any_element_nan_or_inf(x)

return self.coefficients @ x.T
5 changes: 3 additions & 2 deletions src/elexsolver/TransitionMatrixSolver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cvxpy as cp

class TransitionMatrixSolver():

class TransitionMatrixSolver:
def __init__(self):
self.transition_matrix = None

Expand All @@ -12,7 +13,7 @@ def __get_constraint(X, strict):

def __solve(self, A, B, strict):
transition_matrix = cp.Variable((A.shape[1], B.shape[1]))
loss_function = cp.norm(A @ transition_matrix - B, 'fro')
loss_function = cp.norm(A @ transition_matrix - B, "fro")
objective = cp.Minimize(loss_function)
constraint = TransitionMatrixSolver.__get_constraint(transition_matrix, strict)
problem = cp.Problem(objective, constraint)
Expand Down
3 changes: 2 additions & 1 deletion src/elexsolver/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ def initialize_logging(logging_config=None):
app_log_level = os.getenv("APP_LOG_LEVEL", "INFO")
LOGGING_CONFIG["loggers"]["elexsolver"]["level"] = app_log_level
logging_config = LOGGING_CONFIG
logging.config.dictConfig(logging_config)
logging.captureWarnings(True)
logging.config.dictConfig(logging_config)
Loading

0 comments on commit 717ad24

Please sign in to comment.