Skip to content

Commit

Permalink
remove all var importance stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
trentmc committed Jan 29, 2025
1 parent 86a8419 commit d2caa2b
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 165 deletions.
121 changes: 0 additions & 121 deletions pdr_backend/aimodel/aimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from enforce_typing import enforce_types
import numpy as np
from sklearn.calibration import CalibratedClassifierCV
from sklearn.inspection import permutation_importance


class Aimodel:
Expand All @@ -20,7 +18,6 @@ def __init__(
self._sk_regrs = sk_regrs # list of sklearn regressor model
self._y_thr = y_thr # threshold value for True vs False
self._sk_classif = sk_classif # sklearn classifier model
self._imps_tup = None # tuple of (imps_avg, imps_stddev)
self._ycont_offset = 0.0 # offset to the output of regression

@property
Expand Down Expand Up @@ -118,123 +115,5 @@ def _predict_Ycont(self, X):
def set_ycont_offset(self, ycont_offset: float):
self._ycont_offset = ycont_offset

@enforce_types
def importance_per_var(self, include_stddev: bool = False):
"""
@description
Report relative importance of each input variable
@return
imps_avg - 1d array of [var_i]: rel_importance_float
(optional) imps_stddev -- array [var_i]: rel_stddev_float
"""
assert self._imps_tup is not None
if include_stddev:
return self._imps_tup
return self._imps_tup[0]

@enforce_types
def set_importance_per_var(self, X: np.ndarray, y: np.ndarray):
"""
@arguments
X -- 2d array of [sample_i, var_i]:cont_value -- model inputs
y -- 1d array of [sample_i]:value -- model outputs,
where value is bool for classif (ytrue), or float for regr (ycont)
@return
<<sets self._imps_tup>>
"""
assert not self._imps_tup, "have already set importances"
self._imps_tup = self._calc_importance_per_var(X, y)

@enforce_types
def _calc_importance_per_var(self, X, y) -> tuple:
"""
@arguments
X -- 2d array of [sample_i, var_i]:cont_value -- model inputs
y -- 1d array of [sample_i]:value -- model outputs
@return
imps_avg -- 1d array of [var_i]: rel_importance_float
imps_stddev -- array [var_i]: rel_stddev_float
"""
n = X.shape[1]
flat_imps_avg = np.ones(n, dtype=float) / n
flat_imps_stddev = np.ones(n, dtype=float) / n

is_constant = min(y) == max(y)
if is_constant:
return flat_imps_avg, flat_imps_stddev

if self.do_regr:
skm = self
scoring = "neg_root_mean_squared_error"
else:
skm = self._sk_classif
scoring = "f1"

models: list = []
if self.do_regr:
assert self._sk_regrs is not None, "should have _sk_regrs"
models = self._sk_regrs
else:
if type(self._sk_classif) is CalibratedClassifierCV:
models = [i.estimator for i in self._sk_classif.calibrated_classifiers_]
else:
models = [self._sk_classif]

if all(hasattr(model, "coef_") for model in models):
if self.do_regr:
assert self._sk_regrs is not None, "should have _sk_regrs"
coefs = np.mean([np.abs(regr.coef_) for regr in self._sk_regrs], axis=0)
else:
coefs = np.mean([np.abs(clf.coef_[0]) for clf in models], axis=0)

if len(coefs.shape) == 1:
coefs = np.abs(coefs)
else:
coefs = np.mean(np.abs(coefs), axis=0)

imps_avg = coefs / np.sum(coefs)
imps_stddev = np.zeros_like(imps_avg)
else:
imps_bunch = permutation_importance(
skm,
X,
y,
scoring=scoring,
n_repeats=30, # magic number
)
imps_avg = imps_bunch.importances_mean

if max(imps_avg) <= 0: # all vars have negligible importance
return flat_imps_avg, flat_imps_stddev

imps_avg[imps_avg < 0.0] = 0.0 # some vars have negligible importance
assert max(imps_avg) > 0.0, "should have some vars with imp > 0"

imps_stddev = imps_bunch.importances_std

# normalize
_sum = sum(imps_avg)
imps_avg = np.array(imps_avg) / _sum
imps_stddev = np.array(imps_stddev) / _sum

# postconditions
assert imps_avg.shape == (n,)
assert imps_stddev.shape == (n,)
assert 1.0 - 1e-6 <= sum(imps_avg) <= 1.0 + 1e-6
assert min(imps_avg) >= 0.0
assert max(imps_avg) > 0
assert min(imps_stddev) >= 0.0

# return
imps_tup = (imps_avg, imps_stddev)
return imps_tup

# for permutation_importance()
def fit(self):
return

def predict(self, X):
return self.predict_ycont(X)
8 changes: 0 additions & 8 deletions pdr_backend/aimodel/aimodel_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,6 @@ def _build_wrapped_regr(
ycont_offset = current_yval - current_yvalhat
model.set_ycont_offset(ycont_offset)

# variable importances
if self.ss.calc_imps:
model.set_importance_per_var(X, ycont)

# return
return model

Expand Down Expand Up @@ -217,10 +213,6 @@ def _build_direct_classif(
# model
model = Aimodel(scaler, None, None, sk_classif)

# variable importances
if self.ss.calc_imps:
model.set_importance_per_var(X, ytrue)

# return
return model

Expand Down
34 changes: 3 additions & 31 deletions pdr_backend/aimodel/test/test_aimodel_factory_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from enforce_typing import enforce_types
from numpy.testing import assert_array_equal
import pytest
from pytest import approx

from pdr_backend.aimodel.aimodel_factory import AimodelFactory
from pdr_backend.aimodel.ycont_to_ytrue import ycont_to_ytrue
Expand Down Expand Up @@ -42,11 +41,7 @@ def test_aimodel_1var(approach: str, func: str):
ytrue = ycont > y_thr

# build model
model = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)

# test variable importances
imps = model.importance_per_var()
assert_array_equal(imps, np.array([1.0]))
_ = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)


@enforce_types
Expand Down Expand Up @@ -88,12 +83,6 @@ def test_aimodel_2vars(approach: str):
else:
_ = factory.build(X, ytrue, None, None, show_warnings=False)

# test variable importances
imps = model.importance_per_var()
assert sum(imps) == approx(1.0, 0.01)
assert imps[0] == approx(0.333, abs=0.3)
assert imps[1] == approx(0.667, abs=0.3)

# test predict_ycont()
if not model.do_regr:
return
Expand Down Expand Up @@ -212,12 +201,7 @@ def test_aimodel_5varmodel(approach: str):
ytrue = ycont > y_thr

# build model
model = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)

# test variable importances
imps = model.importance_per_var()
assert len(imps) == 5
assert imps[0] < imps[1] < imps[2] < imps[3] < imps[4]
_ = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)


@enforce_types
Expand Down Expand Up @@ -250,19 +234,7 @@ def test_aimodel_4vars_response(approach: str, target_n_classes: int):
ytrue = ycont > y_thr

# build model
model = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)

# test variable importances
imps = model.importance_per_var()
if model.do_regr or target_n_classes == 2: # expect sane var impacts
assert imps[0] > imps[1] > imps[2] > imps[3] > 0.0
assert sum(imps) == approx(1.0, 0.01)
assert imps[0] == approx(4.0 / 10.0, abs=0.2)
assert imps[1] == approx(3.0 / 10.0, abs=0.2)
assert imps[2] == approx(2.0 / 10.0, abs=0.2)
assert imps[3] == approx(1.0 / 10.0, abs=0.2)
else:
assert min(imps) == max(imps) == 0.25
_ = factory.build(X, ytrue, ycont, y_thr, show_warnings=False)


@enforce_types
Expand Down
5 changes: 0 additions & 5 deletions pdr_backend/ppss/aimodel_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,6 @@ def calibrate_probs(self) -> str:
def seed(self) -> Optional[int]:
return self.d.get("seed", None)

@property
def calc_imps(self) -> bool:
"""Calc feature importances"""
return self.d.get("calc_imps", True)

def calibrate_probs_skmethod(self, N: int) -> str:
"""
@description
Expand Down

0 comments on commit d2caa2b

Please sign in to comment.