Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/Quantmetry/qolmat into gsa_b…
Browse files Browse the repository at this point in the history
…ench
  • Loading branch information
Gsaes committed Mar 20, 2024
2 parents 3b81da8 + 53116b9 commit fe73f01
Show file tree
Hide file tree
Showing 35 changed files with 1,861 additions and 2,319 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.1
current_version = 0.1.2
commit = True
tag = True

Expand Down
20 changes: 20 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@
History
=======

0.1.3 (2024-03-07)
------------------

* RPCA algorithms now start with a normalizing scaler
* The EM algorithms now include a gradient projection step to be more robust to colinearity
* The EM algorithm based on the Gaussian model is now initialized using a robust estimation of the covariance matrix
* A bug in the EM algorithm has been patched: the normalizing matrix gamma was creating a sampling biais
* Speed up of the EM algorithm likelihood maximization, using the conjugate gradient method
* The ImputeRegressor class now handles the nans by `row` by default
* The metric `frechet` was not correctly called and has been patched
* The EM algorithm with VAR(p) now fills initial holes in order to avoid exponential explosions

0.1.2 (2024-02-28)
------------------

* RPCA Noisy now has separate fit and transform methods, allowing to impute efficiently new data without retraining
* The class ImputerRPCA has been splitted between a class ImputerRpcaNoisy, which can fit then transform, and a class ImputerRpcaPcp which can only fit_transform
* The class SoftImpute has been recoded to better fit the architecture, and is more tested
* The class RPCANoisy now relies on sparse matrices for H, speeding it up for large instances

0.1.1 (2023-11-03)
-------------------

Expand Down
4 changes: 3 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ Imputers
imputations.imputers.ImputerOracle
imputations.imputers.ImputerRegressor
imputations.imputers.ImputerResiduals
imputations.imputers.ImputerRPCA
imputations.imputers.ImputerRpcaPcp
imputations.imputers.ImputerRpcaNoisy
imputations.imputers.ImputerSoftImpute
imputations.imputers.ImputerShuffle

Comparator
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
author = "Quantmetry"

# The full version, including alpha/beta/rc tags
version = "0.1.1"
version = "0.1.2"
release = version

# -- General configuration ---------------------------------------------------
Expand Down
68 changes: 40 additions & 28 deletions docs/imputers.rst

Large diffs are not rendered by default.

191 changes: 163 additions & 28 deletions examples/RPCA.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ jupyter:
format_version: '1.3'
jupytext_version: 1.14.4
kernelspec:
display_name: Python 3 (ipykernel)
display_name: env_qolmat_dev
language: python
name: python3
name: env_qolmat_dev
---

```python
```python tags=[]
%reload_ext autoreload
%autoreload 2

Expand All @@ -26,17 +26,18 @@ import sys

from math import pi

from qolmat.utils import plot, data
from qolmat.imputations.rpca.rpca_pcp import RPCAPCP
from qolmat.imputations.rpca.rpca_noisy import RPCANoisy
from qolmat.utils import utils, plot, data
from qolmat.imputations.rpca.rpca_pcp import RpcaPcp
from qolmat.imputations.rpca.rpca_noisy import RpcaNoisy
from qolmat.imputations.softimpute import SoftImpute
from qolmat.imputations.rpca import rpca_utils
from qolmat.utils.data import generate_artificial_ts
```

**Generate synthetic data**

```python
n_samples = 1000
```python tags=[]
n_samples = 10000
periods = [100, 20]
amp_anomalies = 0.5
ratio_anomalies = 0.05
Expand All @@ -47,13 +48,15 @@ X_true, A_true, E_true = generate_artificial_ts(n_samples, periods, amp_anomalie
signal = X_true + A_true + E_true

# Adding missing data
#signal[5:20] = np.nan
mask = np.random.choice(len(signal), round(len(signal) / 20))
signal[mask] = np.nan
signal[120:180] = np.nan
signal[:20] = np.nan
# signal[80:220] = np.nan
# mask = np.random.choice(len(signal), round(len(signal) / 20))
# signal[mask] = np.nan

```

```python
```python tags=[]
fig = plt.figure(figsize=(15, 8))
ax = fig.add_subplot(4, 1, 1)
ax.title.set_text("Low-rank signal")
Expand All @@ -74,40 +77,172 @@ plt.plot(signal)
plt.show()
```

<!-- #region tags=[] -->
# Fit RPCA Noisy
<!-- #endregion -->

```python tags=[]
rpca_noisy = RpcaNoisy(tau=1, lam=.4, rank=1, norm="L2")
```

```python tags=[]
period = 100
D = utils.prepare_data(signal, period)
Omega = ~np.isnan(D)
D = utils.linear_interpolation(D)
```

```python tags=[]
M, A, L, Q = rpca_noisy.decompose_with_basis(D, Omega)
M2, A2 = rpca_noisy.decompose_on_basis(D, Omega, Q)
```

```python tags=[]
M_final = utils.get_shape_original(M, signal.shape)
A_final = utils.get_shape_original(A, signal.shape)
D_final = utils.get_shape_original(D, signal.shape)
signal_imputed = M_final + A_final
```

```python tags=[]
fig = plt.figure(figsize=(12, 4))

plt.plot(signal_imputed, label="Imputed signal with anomalies")
plt.plot(M_final, label="Imputed signal without anomalies")
plt.plot(A_final, label="Anomalies")
# plt.plot(D_final, label="D")
plt.plot(signal, color="black", label="Original signal")
plt.xlim(0, 400)
plt.legend()
plt.show()
```

## PCP RPCA

```python tags=[]
rpca_pcp = RpcaPcp(max_iterations=1000, lam=.1)
```

```python tags=[]
period = 100
D = utils.prepare_data(signal, period)
Omega = ~np.isnan(D)
D = utils.linear_interpolation(D)
```

```python tags=[]
M, A = rpca_pcp.decompose(D, Omega)
```

```python tags=[]
M_final = utils.get_shape_original(M, signal.shape)
A_final = utils.get_shape_original(A, signal.shape)
D_final = utils.get_shape_original(D, signal.shape)
# Y_final = utils.get_shape_original(Y, signal.shape)
signal_imputed = M_final + A_final
```

```python tags=[]
fig = plt.figure(figsize=(12, 4))

plt.plot(signal_imputed, label="Imputed signal with anomalies")
plt.plot(M_final, label="Imputed signal without anomalies")
plt.plot(A_final, label="Anomalies")

plt.plot(signal, color="black", label="Original signal")
plt.xlim(0, 400)
# plt.gca().twinx()
# plt.plot(Y_final, label="Y")
plt.legend()
plt.show()
```

## Soft Impute

```python tags=[]
imputer = SoftImpute(max_iterations=1000, tau=.1)
```

```python tags=[]
period = 100
D = utils.prepare_data(signal, period)
Omega = ~np.isnan(D)
D = utils.linear_interpolation(D)
```

```python tags=[]
M, A = imputer.decompose(D, Omega)
```

```python tags=[]
M_final = utils.get_shape_original(M, signal.shape)
A_final = utils.get_shape_original(A, signal.shape)
D_final = utils.get_shape_original(D, signal.shape)
# Y_final = utils.get_shape_original(Y, signal.shape)
signal_imputed = M_final + A_final
```

```python tags=[]
fig = plt.figure(figsize=(12, 4))

plt.plot(signal_imputed, label="Imputed signal with anomalies")
plt.plot(M_final, label="Imputed signal without anomalies")
plt.plot(A_final, label="Anomalies")

plt.plot(signal, color="black", label="Original signal")
plt.xlim(0, 400)
plt.legend()
plt.show()
```

## Temporal RPCA

```python
%%time
rpca_pcp = RPCAPCP(period=100, max_iterations=100, mu=.5, lam=0.1)
X, A = rpca_pcp.decompose_rpca_signal(signal)
imputed = signal - A
# rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, list_periods=[10], list_etas=[0.01], norm="L2")
rpca_noisy = RpcaNoisy(tau=1, lam=0.4, rank=2, norm="L2")
M, A = rpca_noisy.decompose(D, Omega)
# imputed = X
```

```python
```python tags=[]
fig = plt.figure(figsize=(12, 4))
plt.plot(X, color="black")
plt.plot(imputed)

plt.plot(signal_imputed, label="Imputed signal with anomalies")
plt.plot(M_final, label="Imputed signal without anomalies")
plt.plot(A_final, label="Anomalies")

plt.plot(signal, color="black", label="Original signal")
plt.xlim(0, 400)
# plt.gca().twinx()
# plt.plot(Y_final, label="Y")
plt.legend()
plt.show()
```

## Temporal RPCA
# EM VAR(p)

```python
signal.shape
from qolmat.imputations import em_sampler
```

```python
%%time
# rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, list_periods=[10], list_etas=[0.01], norm="L2")
rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, norm="L2")
X, A = rpca_noisy.decompose_rpca_signal(signal)
imputed =
p = 1
model = em_sampler.VARpEM(method="mle", max_iter_em=10, n_iter_ou=512, dt=1e-1, p=p)
```

```python
D = signal.reshape(-1, 1)
M_final = model.fit_transform(D)
```

```python
fig = plt.figure(figsize=(12, 4))
plt.plot(signal, color="black")
plt.plot(X_true)
plt.plot(X)
plt.plot(signal_imputed, label="Imputed signal with anomalies")
plt.plot(M_final, label="Imputed signal without anomalies")
plt.xlim(0, 400)
plt.legend()
plt.show()
```

```python
Expand Down
Loading

0 comments on commit fe73f01

Please sign in to comment.