diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3ca6c7d..3fa6068 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,17 @@ This document records all notable changes to This project adheres to `PEP 440 -- Version Identification and Dependency Specification `_. +0.3.3 () +--------- + +- Providing sklearn-like features + - Added fit_transform method that acts like the `sklearn NMF fit_transform + method `_. + - Returns C_ + - Added components_ attribute, which is synonymous with ST_ + - Added fit_kwargs parameter to McrAR that will pass forward to the fit and fit_transform methods + - One can, e.g., set the ST or C guess from instantiation instead of calling fit or fit_transform + 0.3.2 (19-06-25) ---------------- diff --git a/README.rst b/README.rst index 087e833..90617b7 100644 --- a/README.rst +++ b/README.rst @@ -1,14 +1,14 @@ .. -*- mode: rst -*- -.. image:: https://travis-ci.com/CCampJr/pyMCR.svg?branch=master +.. image:: https://travis-ci.com/CCampJr/pyMCR.svg?branch=0.3.X :alt: Travis CI Status :target: https://travis-ci.com/CCampJr/pyMCR -.. image:: https://ci.appveyor.com/api/projects/status/ajld1bj7jo4oweio/branch/master?svg=true +.. image:: https://ci.appveyor.com/api/projects/status/ajld1bj7jo4oweio/branch/0.3.X?svg=true :alt: AppVeyor CI Status :target: https://ci.appveyor.com/project/CCampJr/pyMCR -.. image:: https://codecov.io/gh/CCampJr/pyMCR/branch/master/graph/badge.svg +.. image:: https://codecov.io/gh/CCampJr/pyMCR/branch/0.3.X/graph/badge.svg :alt: Codecov :target: https://codecov.io/gh/CCampJr/pyMCR diff --git a/pymcr/mcr.py b/pymcr/mcr.py index 0c30ff1..8f11206 100644 --- a/pymcr/mcr.py +++ b/pymcr/mcr.py @@ -1,5 +1,6 @@ """ MCR Main Class for Computation""" import sys as _sys +import copy as _copy import numpy as _np import logging as _logging @@ -30,6 +31,9 @@ class McrAR: Instantiated regression class (or string, see Notes) for calculating the S^T matrix + fit_kwargs : dict + kwargs sent to fit and fit_transform methods + c_fit_kwargs : dict kwargs sent to c_regr.fit method @@ -78,6 +82,9 @@ class McrAR: Most recently calculated S^T matrix (that did not cause a tolerance failure) + components_ : ndarray [n_targets, n_features] + Synonym for ST_, providing sklearn like compatibility + C_opt_ : ndarray [n_samples, n_targets] [Optimal] C matrix for lowest err attribute @@ -131,8 +138,8 @@ class McrAR: """ - def __init__(self, c_regr=OLS(), st_regr=OLS(), c_fit_kwargs={}, - st_fit_kwargs={}, c_constraints=[ConstraintNonneg()], + def __init__(self, c_regr=OLS(), st_regr=OLS(), fit_kwargs={}, + c_fit_kwargs={}, st_fit_kwargs={}, c_constraints=[ConstraintNonneg()], st_constraints=[ConstraintNonneg()], max_iter=50, err_fcn=mse, tol_increase=0.0, tol_n_increase=10, tol_err_change=None, @@ -142,6 +149,8 @@ def __init__(self, c_regr=OLS(), st_regr=OLS(), c_fit_kwargs={}, Multivariate Curve Resolution - Alternating Regression """ + self.fit_kwargs = fit_kwargs + self.max_iter = max_iter self.tol_increase = tol_increase @@ -282,6 +291,13 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True, Notes ----- + - Parameters to fit will SUPERCEDE anything in fit_kwargs, if provided during McrAR + instantiation. + - Note that providing C (or ST) to fit_kwargs and providing ST (or C) to fit or + fit_transform will raise an error. + - When in doubt, clear fit_kwargs via self.fit_kwargs = {} + - Does not affect verbose or c_first parameters + - pyMCR (>= 0.3.1) uses the native Python logging module rather than print statements; thus, to see the messages, one will need to log-to-file or stream to stdout. More info is available in @@ -294,6 +310,31 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True, else: _logger.setLevel(_logging.INFO) + if self.fit_kwargs: + temp = self.fit_kwargs.get('C') + if (temp is not None) & (C is None): + C = temp + + temp = self.fit_kwargs.get('ST') + if (temp is not None) & (ST is None): + ST = temp + + temp = self.fit_kwargs.get('st_fix') + if (temp is not None) & (st_fix is None): + st_fix = temp + + temp = self.fit_kwargs.get('c_fix') + if (temp is not None) & (c_fix is None): + c_fix = temp + + temp = self.fit_kwargs.get('post_iter_fcn') + if (temp is not None) & (post_iter_fcn is None): + post_iter_fcn = temp + + temp = self.fit_kwargs.get('post_half_fcn') + if (temp is not None) & (post_iter_fcn is None): + post_half_fcn = temp + # Ensure only C or ST provided if (C is None) & (ST is None): raise TypeError('C or ST estimate must be provided') @@ -513,6 +554,32 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True, self.exit_tol_err_change = True break + def fit_transform(self, D, **kwargs): + """ + This performs the same purpose as the fit method, but returns the C_ matrix. + Really, it's just to enable sklearn-expectant APIs compatible with pyMCR. + + It is recommended to use the fit method and retrieve your results from C_ and ST_ + + See documentation for the fit method + + Returns + -------- + + C_ : ndarray + C-matrix is returned + + """ + + self.fit(D, **kwargs) + + return self.C_ + + @property + def components_(self): + """ This is just provided for sklearn-like functionality """ + + return self.ST_ if __name__ == '__main__': # pragma: no cover # PyMCR uses the Logging facility to capture messaging diff --git a/pymcr/tests/test_mcr_sklearn_like.py b/pymcr/tests/test_mcr_sklearn_like.py new file mode 100644 index 0000000..cefdf54 --- /dev/null +++ b/pymcr/tests/test_mcr_sklearn_like.py @@ -0,0 +1,290 @@ +import numpy as np + + +from numpy.testing import assert_allclose, assert_equal, assert_array_less + +import pytest + +import pymcr +from pymcr.mcr import McrAR +from pymcr.metrics import mse +from pymcr.constraints import ConstraintNonneg, ConstraintNorm + +@pytest.fixture(scope="function") +def dataset(): + """ Setups dataset """ + + M = 21 + N = 21 + P = 101 + n_components = 2 + + C_img = np.zeros((M,N,n_components)) + C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:]) + C_img[...,1] = 1 - C_img[...,0] + + St_known = np.zeros((n_components, P)) + St_known[0,40:60] = 1 + St_known[1,60:80] = 2 + + C_known = C_img.reshape((-1, n_components)) + + D_known = np.dot(C_known, St_known) + + yield C_known, D_known, St_known + +def test_sklearn_mcr_ideal_default(dataset): + """ Provides C/St_known so optimal should be 1 iteration """ + + C_known, D_known, St_known = dataset + + mcrar = McrAR(fit_kwargs={'ST':St_known}) + mcrar.fit(D_known) + assert_equal(1, mcrar.n_iter_opt) + assert ((mcrar.D_ - D_known)**2).mean() < 1e-10 + assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10 + + mcrar = McrAR(fit_kwargs={'C':C_known}) + mcrar.fit(D_known) + assert_equal(2, mcrar.n_iter_opt) + assert ((mcrar.D_ - D_known)**2).mean() < 1e-10 + assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10 + +def test_sklearn_mcr_ideal_str_regressors(dataset): + """ Test MCR with string-provded regressors""" + + C_known, D_known, St_known = dataset + + mcrar = McrAR(c_regr='OLS', st_regr='OLS', fit_kwargs={'ST':St_known}) + mcrar.fit(D_known, verbose=True) + assert_equal(1, mcrar.n_iter_opt) + assert isinstance(mcrar.c_regressor, pymcr.regressors.OLS) + assert isinstance(mcrar.st_regressor, pymcr.regressors.OLS) + + mcrar = McrAR(c_regr='NNLS', st_regr='NNLS', fit_kwargs={'ST':St_known}) + mcrar.fit(D_known) + assert_equal(1, mcrar.n_iter_opt) + assert isinstance(mcrar.c_regressor, pymcr.regressors.NNLS) + assert isinstance(mcrar.st_regressor, pymcr.regressors.NNLS) + assert ((mcrar.D_ - D_known)**2).mean() < 1e-10 + assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10 + + # Provided C_known this time + mcrar = McrAR(c_regr='OLS', st_regr='OLS', fit_kwargs={'C':C_known}) + mcrar.fit(D_known) + + # Turns out some systems get it in 1 iteration, some in 2 + # assert_equal(1, mcrar.n_iter_opt) + assert_equal(True, mcrar.n_iter_opt<=2) + + assert ((mcrar.D_ - D_known)**2).mean() < 1e-10 + assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10 + +def test_sklearn_mcr_max_iterations(dataset): + """ Test MCR exits at max_iter""" + + C_known, D_known, St_known = dataset + + # Seeding with a constant of 0.1 for C, actually leads to a bad local + # minimum; thus, the err_change gets really small with a relatively bad + # error. The tol_err_change is set to None, so it makes it to max_iter. + mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS', + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_increase=None, tol_n_increase=None, + tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1}) + mcrar.fit(D_known) + assert mcrar.exit_max_iter_reached + +def test_sklearn_mcr_tol_increase(dataset): + """ Test MCR exits due error increasing above a tolerance fraction""" + + C_known, D_known, St_known = dataset + + # Seeding with a constant of 0.1 for C, actually leads to a bad local + # minimum; thus, the err_change gets really small with a relatively bad + # error. + mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS', + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_increase=0, tol_n_increase=None, + tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1}) + mcrar.fit(D_known) + assert mcrar.exit_tol_increase + +def test_sklearn_mcr_tol_n_increase(dataset): + """ + Test MCR exits due iterating n times with an increase in error + + Note: On some CI systems, the minimum err bottoms out; thus, tol_n_above_min + needed to be set to 0 to trigger a break. + """ + + C_known, D_known, St_known = dataset + + mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS', + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_increase=None, tol_n_increase=0, + tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1}) + mcrar.fit(D_known) + assert mcrar.exit_tol_n_increase + +def test_sklearn_mcr_tol_err_change(dataset): + """ Test MCR exits due error increasing by a value """ + + C_known, D_known, St_known = dataset + + mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS', + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_increase=None, tol_n_increase=None, + tol_err_change=1e-20, tol_n_above_min=None, fit_kwargs={'C':C_known}) + mcrar.fit(D_known) + assert mcrar.exit_tol_err_change + +def test_sklearn_mcr_tol_n_above_min(dataset): + """ + Test MCR exits due to half-terating n times with error above the minimum error. + + Note: On some CI systems, the minimum err bottoms out; thus, tol_n_above_min + needed to be set to 0 to trigger a break. + """ + + C_known, D_known, St_known = dataset + + mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS', + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_increase=None, tol_n_increase=None, + tol_err_change=None, tol_n_above_min=0, fit_kwargs={'C':C_known*0 + 0.1}) + mcrar.fit(D_known) + assert mcrar.exit_tol_n_above_min + + +def test_sklearn_mcr_st_semilearned(): + """ Test when St items are fixed, i.e., enforced to be the same as the input, always """ + + M = 21 + N = 21 + P = 101 + n_components = 3 + + C_img = np.zeros((M,N,n_components)) + C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:]) + C_img[...,1] = np.dot(np.linspace(0,1,M)[:, None], np.ones((1,N))) + C_img[...,2] = 1 - C_img[...,0] - C_img[...,1] + C_img = C_img / C_img.sum(axis=-1)[:,:,None] + + St_known = np.zeros((n_components, P)) + St_known[0,30:50] = 1 + St_known[1,50:70] = 2 + St_known[2,70:90] = 3 + St_known += 1 + + C_known = C_img.reshape((-1, n_components)) + + D_known = np.dot(C_known, St_known) + + ST_guess = 1 * St_known + ST_guess[2, :] = np.random.randn(P) + + mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10, + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_err_change=1e-10, fit_kwargs={'ST':ST_guess, 'st_fix':[0,1]}) + + mcrar.fit(D_known) + assert_equal(mcrar.ST_[0,:], St_known[0,:]) + assert_equal(mcrar.ST_[1,:], St_known[1,:]) + +def test_sklearn_mcr_c_semilearned(): + """ Test when C items are fixed, i.e., enforced to be the same as the input, always """ + + M = 21 + N = 21 + P = 101 + n_components = 3 + + C_img = np.zeros((M,N,n_components)) + C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:]) + C_img[...,1] = np.dot(np.linspace(0,1,M)[:, None], np.ones((1,N))) + C_img[...,2] = 1 - C_img[...,0] - C_img[...,1] + C_img = C_img / C_img.sum(axis=-1)[:,:,None] + + St_known = np.zeros((n_components, P)) + St_known[0,30:50] = 1 + St_known[1,50:70] = 2 + St_known[2,70:90] = 3 + St_known += 1 + + C_known = C_img.reshape((-1, n_components)) + + D_known = np.dot(C_known, St_known) + + C_guess = 1 * C_known + C_guess[:, 2] = np.abs(np.random.randn(int(M*N))+0.1) + + mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10, + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_err_change=1e-10, fit_kwargs={'C':C_guess, 'c_fix':[0,1]}) + + mcrar.fit(D_known) + assert_equal(mcrar.C_[:, 0], C_known[:, 0]) + assert_equal(mcrar.C_[:, 1], C_known[:, 1]) + +def test_sklearn_mcr_semilearned_both_c_st(): + """ + Test the special case when C & ST are provided, requiring C-fix ST-fix to + be provided + """ + + M = 21 + N = 21 + P = 101 + n_components = 3 + + C_img = np.zeros((M,N,n_components)) + C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0.1,1,N)[None,:]) + C_img[...,1] = np.dot(np.linspace(0.1,1,M)[:, None], np.ones((1,N))) + C_img[...,2] = 1 - C_img[...,0] - C_img[...,1] + C_img = C_img / C_img.sum(axis=-1)[:,:,None] + + St_known = np.zeros((n_components, P)) + St_known[0,30:50] = 1 + St_known[1,50:70] = 2 + St_known[2,70:90] = 3 + St_known += 1 + + C_known = C_img.reshape((-1, n_components)) + + D_known = np.dot(C_known, St_known) + + C_guess = 1 * C_known + C_guess[:, 2] = np.abs(np.random.randn(int(M*N))) + + mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10, + st_constraints=[ConstraintNonneg()], + c_constraints=[ConstraintNonneg(), ConstraintNorm()], + tol_err_change=1e-10, fit_kwargs={'C':C_guess, 'ST':St_known, + 'c_fix':[0,1], 'st_fix':[0]}) + + mcrar.fit(D_known, c_first=True) + assert_equal(mcrar.C_[:, 0], C_known[:, 0]) + assert_equal(mcrar.C_[:, 1], C_known[:, 1]) + assert_equal(mcrar.ST_[0, :], St_known[0, :]) + + # ST-solve first + mcrar.fit(D_known, C=C_guess, ST=St_known, c_fix=[0,1], st_fix=[0], c_first=False) + assert_equal(mcrar.C_[:, 0], C_known[:, 0]) + assert_equal(mcrar.C_[:, 1], C_known[:, 1]) + assert_equal(mcrar.ST_[0, :], St_known[0, :]) + +def test_sklearn_mcr_errors(): + + # Providing C in fit_kwargs and S^T to fit without both C_fix and St_fix + with pytest.raises(TypeError): + mcrar = McrAR(fit_kwargs={'C':np.random.randn(10,3), 'c_fix':[0]}) + # Only c_fix + mcrar.fit(np.random.randn(10,5), ST=np.random.randn(3,5)) diff --git a/setup.py b/setup.py index bc66e33..bb32b1f 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ long_description = f.read() setup(name='pyMCR', - version = '0.3.2rc0', + version = '0.3.3a0', description = 'Multivariate Curve Resolution in Python', long_description = long_description, url = 'https://github.com/usnistgov/pyMCR',