Skip to content

Commit

Permalink
fix: Tanhscaler nan output for constant feature (#153)
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 authored Mar 27, 2023
1 parent 69006eb commit b61ac1f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
18 changes: 15 additions & 3 deletions numalogic/preprocess/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def inverse_transform(self, X) -> ArrayLike:

class TanhScaler(OneToOneFeatureMixin, TransformerMixin):
r"""
Tanh Estimator applies tanh normalization to the Z-score,
Tanh Estimator applies column-wise tanh normalization to the Z-score,
and scales the values between 0 and 1.
After scaling, the data has a mean of 0.5.
Expand All @@ -63,21 +63,29 @@ class TanhScaler(OneToOneFeatureMixin, TransformerMixin):
Higher the value, the linear portion of the curve will have a higher slope
but will reach the asymptote (flatten out) earlier.
Args:
coeff: float value determining the spread of the scores
eps: minimum value below which the feature will be treated as constant.
In order to avoid division by zero or a very small number,
standard deviation will be set as 1 for that feature.
References:
Nandakumar, Jain, Ross. 2005. Score Normalization in
Multimodal Biometric Systems, Pattern Recognition 38, 2270-2285.
https://web.cse.msu.edu/~rossarun/pubs/RossScoreNormalization_PR05.pdf
"""
__slots__ = ("_coeff", "_std", "_mean")
__slots__ = ("_coeff", "_std", "_mean", "_eps")

def __init__(self, coeff: float = 0.2):
def __init__(self, coeff: float = 0.2, eps: float = 1e-10):
self._coeff = coeff
self._std = None
self._mean = None
self._eps = eps

def fit(self, x: npt.NDArray[float]) -> Self:
self._mean = np.mean(x, axis=0)
self._std = np.std(x, axis=0)
self._check_if_constant(x)
return self

def transform(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
Expand All @@ -86,3 +94,7 @@ def transform(self, x: npt.NDArray[float]) -> npt.NDArray[float]:

def fit_transform(self, x: npt.NDArray[float], y=None, **_) -> npt.NDArray[float]:
return self.fit(x).transform(x)

def _check_if_constant(self, x: npt.NDArray[float]) -> None:
delta = np.max(x, axis=0) - np.min(x, axis=0)
self._std[delta < self._eps] = 1.0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "numalogic"
version = "0.3.6"
version = "0.3.7"
description = "Collection of operational Machine Learning models and tools."
authors = ["Numalogic Developers"]
packages = [{ include = "numalogic" }]
Expand Down
20 changes: 20 additions & 0 deletions tests/preprocess/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@ def test_tanh_scaler_2(self):
assert_array_less(x_scaled, np.ones_like(x_scaled))
assert_array_less(np.zeros_like(x_scaled), x_scaled)

def test_tanh_scaler_3(self):
x = np.random.randn(5, 3)
x[:, 1] = np.zeros(5)

scaler = TanhScaler()

x_scaled = scaler.fit_transform(x)
self.assertFalse(np.isnan(x_scaled[:, 1]).all())
assert_array_less(x_scaled, np.ones_like(x_scaled))
assert_array_less(np.zeros_like(x_scaled), x_scaled)

def test_tanh_scaler_nan(self):
x = np.random.randn(5, 3)
x[:, 1] = np.zeros(5)

scaler = TanhScaler(eps=0.0)

x_scaled = scaler.fit_transform(x)
self.assertTrue(np.isnan(x_scaled[:, 1]).all())


if __name__ == "__main__":
unittest.main()

0 comments on commit b61ac1f

Please sign in to comment.