diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3ca6c7d..3fa6068 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,6 +8,17 @@ This document records all notable changes to
This project adheres to `PEP 440 -- Version Identification
and Dependency Specification `_.
+0.3.3 ()
+---------
+
+- Providing sklearn-like features
+ - Added fit_transform method that acts like the `sklearn NMF fit_transform
+ method `_.
+ - Returns C_
+ - Added components_ attribute, which is synonymous with ST_
+ - Added fit_kwargs parameter to McrAR that will pass forward to the fit and fit_transform methods
+ - One can, e.g., set the ST or C guess from instantiation instead of calling fit or fit_transform
+
0.3.2 (19-06-25)
----------------
diff --git a/README.rst b/README.rst
index 087e833..90617b7 100644
--- a/README.rst
+++ b/README.rst
@@ -1,14 +1,14 @@
.. -*- mode: rst -*-
-.. image:: https://travis-ci.com/CCampJr/pyMCR.svg?branch=master
+.. image:: https://travis-ci.com/CCampJr/pyMCR.svg?branch=0.3.X
:alt: Travis CI Status
:target: https://travis-ci.com/CCampJr/pyMCR
-.. image:: https://ci.appveyor.com/api/projects/status/ajld1bj7jo4oweio/branch/master?svg=true
+.. image:: https://ci.appveyor.com/api/projects/status/ajld1bj7jo4oweio/branch/0.3.X?svg=true
:alt: AppVeyor CI Status
:target: https://ci.appveyor.com/project/CCampJr/pyMCR
-.. image:: https://codecov.io/gh/CCampJr/pyMCR/branch/master/graph/badge.svg
+.. image:: https://codecov.io/gh/CCampJr/pyMCR/branch/0.3.X/graph/badge.svg
:alt: Codecov
:target: https://codecov.io/gh/CCampJr/pyMCR
diff --git a/pymcr/mcr.py b/pymcr/mcr.py
index 0c30ff1..8f11206 100644
--- a/pymcr/mcr.py
+++ b/pymcr/mcr.py
@@ -1,5 +1,6 @@
""" MCR Main Class for Computation"""
import sys as _sys
+import copy as _copy
import numpy as _np
import logging as _logging
@@ -30,6 +31,9 @@ class McrAR:
Instantiated regression class (or string, see Notes) for calculating
the S^T matrix
+ fit_kwargs : dict
+ kwargs sent to fit and fit_transform methods
+
c_fit_kwargs : dict
kwargs sent to c_regr.fit method
@@ -78,6 +82,9 @@ class McrAR:
Most recently calculated S^T matrix (that did not cause a tolerance
failure)
+ components_ : ndarray [n_targets, n_features]
+ Synonym for ST_, providing sklearn like compatibility
+
C_opt_ : ndarray [n_samples, n_targets]
[Optimal] C matrix for lowest err attribute
@@ -131,8 +138,8 @@ class McrAR:
"""
- def __init__(self, c_regr=OLS(), st_regr=OLS(), c_fit_kwargs={},
- st_fit_kwargs={}, c_constraints=[ConstraintNonneg()],
+ def __init__(self, c_regr=OLS(), st_regr=OLS(), fit_kwargs={},
+ c_fit_kwargs={}, st_fit_kwargs={}, c_constraints=[ConstraintNonneg()],
st_constraints=[ConstraintNonneg()],
max_iter=50, err_fcn=mse,
tol_increase=0.0, tol_n_increase=10, tol_err_change=None,
@@ -142,6 +149,8 @@ def __init__(self, c_regr=OLS(), st_regr=OLS(), c_fit_kwargs={},
Multivariate Curve Resolution - Alternating Regression
"""
+ self.fit_kwargs = fit_kwargs
+
self.max_iter = max_iter
self.tol_increase = tol_increase
@@ -282,6 +291,13 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True,
Notes
-----
+ - Parameters to fit will SUPERCEDE anything in fit_kwargs, if provided during McrAR
+ instantiation.
+ - Note that providing C (or ST) to fit_kwargs and providing ST (or C) to fit or
+ fit_transform will raise an error.
+ - When in doubt, clear fit_kwargs via self.fit_kwargs = {}
+ - Does not affect verbose or c_first parameters
+
- pyMCR (>= 0.3.1) uses the native Python logging module
rather than print statements; thus, to see the messages, one will
need to log-to-file or stream to stdout. More info is available in
@@ -294,6 +310,31 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True,
else:
_logger.setLevel(_logging.INFO)
+ if self.fit_kwargs:
+ temp = self.fit_kwargs.get('C')
+ if (temp is not None) & (C is None):
+ C = temp
+
+ temp = self.fit_kwargs.get('ST')
+ if (temp is not None) & (ST is None):
+ ST = temp
+
+ temp = self.fit_kwargs.get('st_fix')
+ if (temp is not None) & (st_fix is None):
+ st_fix = temp
+
+ temp = self.fit_kwargs.get('c_fix')
+ if (temp is not None) & (c_fix is None):
+ c_fix = temp
+
+ temp = self.fit_kwargs.get('post_iter_fcn')
+ if (temp is not None) & (post_iter_fcn is None):
+ post_iter_fcn = temp
+
+ temp = self.fit_kwargs.get('post_half_fcn')
+ if (temp is not None) & (post_iter_fcn is None):
+ post_half_fcn = temp
+
# Ensure only C or ST provided
if (C is None) & (ST is None):
raise TypeError('C or ST estimate must be provided')
@@ -513,6 +554,32 @@ def fit(self, D, C=None, ST=None, st_fix=None, c_fix=None, c_first=True,
self.exit_tol_err_change = True
break
+ def fit_transform(self, D, **kwargs):
+ """
+ This performs the same purpose as the fit method, but returns the C_ matrix.
+ Really, it's just to enable sklearn-expectant APIs compatible with pyMCR.
+
+ It is recommended to use the fit method and retrieve your results from C_ and ST_
+
+ See documentation for the fit method
+
+ Returns
+ --------
+
+ C_ : ndarray
+ C-matrix is returned
+
+ """
+
+ self.fit(D, **kwargs)
+
+ return self.C_
+
+ @property
+ def components_(self):
+ """ This is just provided for sklearn-like functionality """
+
+ return self.ST_
if __name__ == '__main__': # pragma: no cover
# PyMCR uses the Logging facility to capture messaging
diff --git a/pymcr/tests/test_mcr_sklearn_like.py b/pymcr/tests/test_mcr_sklearn_like.py
new file mode 100644
index 0000000..cefdf54
--- /dev/null
+++ b/pymcr/tests/test_mcr_sklearn_like.py
@@ -0,0 +1,290 @@
+import numpy as np
+
+
+from numpy.testing import assert_allclose, assert_equal, assert_array_less
+
+import pytest
+
+import pymcr
+from pymcr.mcr import McrAR
+from pymcr.metrics import mse
+from pymcr.constraints import ConstraintNonneg, ConstraintNorm
+
+@pytest.fixture(scope="function")
+def dataset():
+ """ Setups dataset """
+
+ M = 21
+ N = 21
+ P = 101
+ n_components = 2
+
+ C_img = np.zeros((M,N,n_components))
+ C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:])
+ C_img[...,1] = 1 - C_img[...,0]
+
+ St_known = np.zeros((n_components, P))
+ St_known[0,40:60] = 1
+ St_known[1,60:80] = 2
+
+ C_known = C_img.reshape((-1, n_components))
+
+ D_known = np.dot(C_known, St_known)
+
+ yield C_known, D_known, St_known
+
+def test_sklearn_mcr_ideal_default(dataset):
+ """ Provides C/St_known so optimal should be 1 iteration """
+
+ C_known, D_known, St_known = dataset
+
+ mcrar = McrAR(fit_kwargs={'ST':St_known})
+ mcrar.fit(D_known)
+ assert_equal(1, mcrar.n_iter_opt)
+ assert ((mcrar.D_ - D_known)**2).mean() < 1e-10
+ assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10
+
+ mcrar = McrAR(fit_kwargs={'C':C_known})
+ mcrar.fit(D_known)
+ assert_equal(2, mcrar.n_iter_opt)
+ assert ((mcrar.D_ - D_known)**2).mean() < 1e-10
+ assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10
+
+def test_sklearn_mcr_ideal_str_regressors(dataset):
+ """ Test MCR with string-provded regressors"""
+
+ C_known, D_known, St_known = dataset
+
+ mcrar = McrAR(c_regr='OLS', st_regr='OLS', fit_kwargs={'ST':St_known})
+ mcrar.fit(D_known, verbose=True)
+ assert_equal(1, mcrar.n_iter_opt)
+ assert isinstance(mcrar.c_regressor, pymcr.regressors.OLS)
+ assert isinstance(mcrar.st_regressor, pymcr.regressors.OLS)
+
+ mcrar = McrAR(c_regr='NNLS', st_regr='NNLS', fit_kwargs={'ST':St_known})
+ mcrar.fit(D_known)
+ assert_equal(1, mcrar.n_iter_opt)
+ assert isinstance(mcrar.c_regressor, pymcr.regressors.NNLS)
+ assert isinstance(mcrar.st_regressor, pymcr.regressors.NNLS)
+ assert ((mcrar.D_ - D_known)**2).mean() < 1e-10
+ assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10
+
+ # Provided C_known this time
+ mcrar = McrAR(c_regr='OLS', st_regr='OLS', fit_kwargs={'C':C_known})
+ mcrar.fit(D_known)
+
+ # Turns out some systems get it in 1 iteration, some in 2
+ # assert_equal(1, mcrar.n_iter_opt)
+ assert_equal(True, mcrar.n_iter_opt<=2)
+
+ assert ((mcrar.D_ - D_known)**2).mean() < 1e-10
+ assert ((mcrar.D_opt_ - D_known)**2).mean() < 1e-10
+
+def test_sklearn_mcr_max_iterations(dataset):
+ """ Test MCR exits at max_iter"""
+
+ C_known, D_known, St_known = dataset
+
+ # Seeding with a constant of 0.1 for C, actually leads to a bad local
+ # minimum; thus, the err_change gets really small with a relatively bad
+ # error. The tol_err_change is set to None, so it makes it to max_iter.
+ mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS',
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_increase=None, tol_n_increase=None,
+ tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1})
+ mcrar.fit(D_known)
+ assert mcrar.exit_max_iter_reached
+
+def test_sklearn_mcr_tol_increase(dataset):
+ """ Test MCR exits due error increasing above a tolerance fraction"""
+
+ C_known, D_known, St_known = dataset
+
+ # Seeding with a constant of 0.1 for C, actually leads to a bad local
+ # minimum; thus, the err_change gets really small with a relatively bad
+ # error.
+ mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS',
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_increase=0, tol_n_increase=None,
+ tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1})
+ mcrar.fit(D_known)
+ assert mcrar.exit_tol_increase
+
+def test_sklearn_mcr_tol_n_increase(dataset):
+ """
+ Test MCR exits due iterating n times with an increase in error
+
+ Note: On some CI systems, the minimum err bottoms out; thus, tol_n_above_min
+ needed to be set to 0 to trigger a break.
+ """
+
+ C_known, D_known, St_known = dataset
+
+ mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS',
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_increase=None, tol_n_increase=0,
+ tol_err_change=None, tol_n_above_min=None, fit_kwargs={'C':C_known*0 + 0.1})
+ mcrar.fit(D_known)
+ assert mcrar.exit_tol_n_increase
+
+def test_sklearn_mcr_tol_err_change(dataset):
+ """ Test MCR exits due error increasing by a value """
+
+ C_known, D_known, St_known = dataset
+
+ mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS',
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_increase=None, tol_n_increase=None,
+ tol_err_change=1e-20, tol_n_above_min=None, fit_kwargs={'C':C_known})
+ mcrar.fit(D_known)
+ assert mcrar.exit_tol_err_change
+
+def test_sklearn_mcr_tol_n_above_min(dataset):
+ """
+ Test MCR exits due to half-terating n times with error above the minimum error.
+
+ Note: On some CI systems, the minimum err bottoms out; thus, tol_n_above_min
+ needed to be set to 0 to trigger a break.
+ """
+
+ C_known, D_known, St_known = dataset
+
+ mcrar = McrAR(max_iter=50, c_regr='OLS', st_regr='OLS',
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_increase=None, tol_n_increase=None,
+ tol_err_change=None, tol_n_above_min=0, fit_kwargs={'C':C_known*0 + 0.1})
+ mcrar.fit(D_known)
+ assert mcrar.exit_tol_n_above_min
+
+
+def test_sklearn_mcr_st_semilearned():
+ """ Test when St items are fixed, i.e., enforced to be the same as the input, always """
+
+ M = 21
+ N = 21
+ P = 101
+ n_components = 3
+
+ C_img = np.zeros((M,N,n_components))
+ C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:])
+ C_img[...,1] = np.dot(np.linspace(0,1,M)[:, None], np.ones((1,N)))
+ C_img[...,2] = 1 - C_img[...,0] - C_img[...,1]
+ C_img = C_img / C_img.sum(axis=-1)[:,:,None]
+
+ St_known = np.zeros((n_components, P))
+ St_known[0,30:50] = 1
+ St_known[1,50:70] = 2
+ St_known[2,70:90] = 3
+ St_known += 1
+
+ C_known = C_img.reshape((-1, n_components))
+
+ D_known = np.dot(C_known, St_known)
+
+ ST_guess = 1 * St_known
+ ST_guess[2, :] = np.random.randn(P)
+
+ mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10,
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_err_change=1e-10, fit_kwargs={'ST':ST_guess, 'st_fix':[0,1]})
+
+ mcrar.fit(D_known)
+ assert_equal(mcrar.ST_[0,:], St_known[0,:])
+ assert_equal(mcrar.ST_[1,:], St_known[1,:])
+
+def test_sklearn_mcr_c_semilearned():
+ """ Test when C items are fixed, i.e., enforced to be the same as the input, always """
+
+ M = 21
+ N = 21
+ P = 101
+ n_components = 3
+
+ C_img = np.zeros((M,N,n_components))
+ C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0,1,N)[None,:])
+ C_img[...,1] = np.dot(np.linspace(0,1,M)[:, None], np.ones((1,N)))
+ C_img[...,2] = 1 - C_img[...,0] - C_img[...,1]
+ C_img = C_img / C_img.sum(axis=-1)[:,:,None]
+
+ St_known = np.zeros((n_components, P))
+ St_known[0,30:50] = 1
+ St_known[1,50:70] = 2
+ St_known[2,70:90] = 3
+ St_known += 1
+
+ C_known = C_img.reshape((-1, n_components))
+
+ D_known = np.dot(C_known, St_known)
+
+ C_guess = 1 * C_known
+ C_guess[:, 2] = np.abs(np.random.randn(int(M*N))+0.1)
+
+ mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10,
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_err_change=1e-10, fit_kwargs={'C':C_guess, 'c_fix':[0,1]})
+
+ mcrar.fit(D_known)
+ assert_equal(mcrar.C_[:, 0], C_known[:, 0])
+ assert_equal(mcrar.C_[:, 1], C_known[:, 1])
+
+def test_sklearn_mcr_semilearned_both_c_st():
+ """
+ Test the special case when C & ST are provided, requiring C-fix ST-fix to
+ be provided
+ """
+
+ M = 21
+ N = 21
+ P = 101
+ n_components = 3
+
+ C_img = np.zeros((M,N,n_components))
+ C_img[...,0] = np.dot(np.ones((M,1)),np.linspace(0.1,1,N)[None,:])
+ C_img[...,1] = np.dot(np.linspace(0.1,1,M)[:, None], np.ones((1,N)))
+ C_img[...,2] = 1 - C_img[...,0] - C_img[...,1]
+ C_img = C_img / C_img.sum(axis=-1)[:,:,None]
+
+ St_known = np.zeros((n_components, P))
+ St_known[0,30:50] = 1
+ St_known[1,50:70] = 2
+ St_known[2,70:90] = 3
+ St_known += 1
+
+ C_known = C_img.reshape((-1, n_components))
+
+ D_known = np.dot(C_known, St_known)
+
+ C_guess = 1 * C_known
+ C_guess[:, 2] = np.abs(np.random.randn(int(M*N)))
+
+ mcrar = McrAR(max_iter=50, tol_increase=100, tol_n_increase=10,
+ st_constraints=[ConstraintNonneg()],
+ c_constraints=[ConstraintNonneg(), ConstraintNorm()],
+ tol_err_change=1e-10, fit_kwargs={'C':C_guess, 'ST':St_known,
+ 'c_fix':[0,1], 'st_fix':[0]})
+
+ mcrar.fit(D_known, c_first=True)
+ assert_equal(mcrar.C_[:, 0], C_known[:, 0])
+ assert_equal(mcrar.C_[:, 1], C_known[:, 1])
+ assert_equal(mcrar.ST_[0, :], St_known[0, :])
+
+ # ST-solve first
+ mcrar.fit(D_known, C=C_guess, ST=St_known, c_fix=[0,1], st_fix=[0], c_first=False)
+ assert_equal(mcrar.C_[:, 0], C_known[:, 0])
+ assert_equal(mcrar.C_[:, 1], C_known[:, 1])
+ assert_equal(mcrar.ST_[0, :], St_known[0, :])
+
+def test_sklearn_mcr_errors():
+
+ # Providing C in fit_kwargs and S^T to fit without both C_fix and St_fix
+ with pytest.raises(TypeError):
+ mcrar = McrAR(fit_kwargs={'C':np.random.randn(10,3), 'c_fix':[0]})
+ # Only c_fix
+ mcrar.fit(np.random.randn(10,5), ST=np.random.randn(3,5))
diff --git a/setup.py b/setup.py
index bc66e33..bb32b1f 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
long_description = f.read()
setup(name='pyMCR',
- version = '0.3.2rc0',
+ version = '0.3.3a0',
description = 'Multivariate Curve Resolution in Python',
long_description = long_description,
url = 'https://github.com/usnistgov/pyMCR',