From e8e3d13aebc5426985a8c665e6d302300ee06f05 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Thu, 20 Feb 2025 13:03:45 +0000
Subject: [PATCH 1/7] Reworked TimeSeries feature extraction to follow project
 struture fix: implemented BaseTSFeature extrature parent class fix: TSFEL
 child class from BaseTSExtractor class fix: TimeSeries feature extractor
 factory class

---
 src/pymdma/api/hooks.py                       |  4 +-
 src/pymdma/common/definitions.py              |  6 +-
 src/pymdma/time_series/input_layer.py         | 14 +--
 src/pymdma/time_series/models/__init__.py     |  4 +
 .../extractor.py}                             | 86 +++----------------
 src/pymdma/time_series/models/features.py     | 24 ++++++
 src/pymdma/time_series/models/tsfel.py        | 64 ++++++++++++++
 tests/conftest.py                             |  6 +-
 8 files changed, 124 insertions(+), 84 deletions(-)
 rename src/pymdma/time_series/{utils/extract_features.py => models/extractor.py} (51%)
 create mode 100644 src/pymdma/time_series/models/features.py
 create mode 100644 src/pymdma/time_series/models/tsfel.py

diff --git a/src/pymdma/api/hooks.py b/src/pymdma/api/hooks.py
index c261a47..113ff6b 100644
--- a/src/pymdma/api/hooks.py
+++ b/src/pymdma/api/hooks.py
@@ -1,12 +1,12 @@
 from loguru import logger
 
 from ..image.models.features import ExtractorFactory as ImageFeatureExtractor
-from ..time_series.utils.extract_features import FeatureExtractor as TimeSeriesFeatureExtractor
+from ..time_series.models.features import ExtractorFactory as TimeSeriesFeatureExtractor
 
 
 def load_models_hook(ml_models, device="cpu"):
     logger.info("Loading ml models")
     # feature extractors
     ml_models["dino_vits8"] = ImageFeatureExtractor.model_from_name("dino_vits8").to(device)
-    ml_models["tsfel"] = TimeSeriesFeatureExtractor("tsfel", device)
+    ml_models["tsfel"] = TimeSeriesFeatureExtractor.model_from_name("tsfel")
     logger.info("Models loaded successfully")
diff --git a/src/pymdma/common/definitions.py b/src/pymdma/common/definitions.py
index eb31468..20a933c 100644
--- a/src/pymdma/common/definitions.py
+++ b/src/pymdma/common/definitions.py
@@ -79,5 +79,9 @@ def __init__(self, name: str) -> None:
         self.name = name
 
     @abstractmethod
-    def _extract_features_dataloader(self, dataloader):
+    def extract_features_from_files(self, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def _extract_features_dataloader(self, dataloader, **kwargs):
         pass
diff --git a/src/pymdma/time_series/input_layer.py b/src/pymdma/time_series/input_layer.py
index e63dee5..67c3436 100644
--- a/src/pymdma/time_series/input_layer.py
+++ b/src/pymdma/time_series/input_layer.py
@@ -10,7 +10,7 @@
 from pymdma.constants import ReferenceType, ValidationDomain
 
 from .data.simple_dataset import SimpleDataset
-from .utils.extract_features import FeatureExtractor
+from .models.features import ExtractorFactory
 
 # Get the absolute path of the parent directory
 parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -176,16 +176,18 @@ def get_embeddings(
         if model_instances is not None:
             if model_name in model_instances:
                 extractor = model_instances[model_name]
-            elif model_name == "default" and FeatureExtractor.default in model_instances:
-                extractor = model_instances[FeatureExtractor.default]
-        extractor = FeatureExtractor(model_name, device=self.device) if extractor is None else extractor
+            elif model_name == "default" and ExtractorFactory.default in model_instances:
+                extractor = model_instances[ExtractorFactory.default]
+        if extractor is None:
+            model_name = ExtractorFactory.default if model_name == "default" else model_name
+            extractor = ExtractorFactory.model_from_name(model_name) if extractor is None else extractor
 
-        reference_features, _labels, _ = extractor.extract_features_dataloader(
+        reference_features, _labels, _ = extractor._extract_features_dataloader(
             self.reference_loader,
             self.reference_loader.dataset.fs,
             self.reference_loader.dataset.dims,
         )
-        synth_features, _labels, self.instance_ids = extractor.extract_features_dataloader(
+        synth_features, _labels, self.instance_ids = extractor._extract_features_dataloader(
             self.target_loader,
             self.target_loader.dataset.fs,
             self.target_loader.dataset.dims,
diff --git a/src/pymdma/time_series/models/__init__.py b/src/pymdma/time_series/models/__init__.py
index e69de29..900d5f1 100644
--- a/src/pymdma/time_series/models/__init__.py
+++ b/src/pymdma/time_series/models/__init__.py
@@ -0,0 +1,4 @@
+from .features import ExtractorFactory
+from .tsfel import TSFEL
+
+__all__ = ["ExtractorFactory", "TSFEL"]
diff --git a/src/pymdma/time_series/utils/extract_features.py b/src/pymdma/time_series/models/extractor.py
similarity index 51%
rename from src/pymdma/time_series/utils/extract_features.py
rename to src/pymdma/time_series/models/extractor.py
index 24583d4..c8c1016 100644
--- a/src/pymdma/time_series/utils/extract_features.py
+++ b/src/pymdma/time_series/models/extractor.py
@@ -1,48 +1,29 @@
 from pathlib import Path
-from typing import List
+from typing import Callable, List, Union
 
 import numpy as np
+import torch
 import tsfel
+from torch import nn
 from torch.utils.data import DataLoader
 
+from pymdma.common.definitions import EmbedderInterface
+
 from ..data.simple_dataset import _read_sig_file
 
 
-class FeatureExtractor:
+class BaseTSExtractor(nn.Module, EmbedderInterface):
     default: str = "tsfel"
+    extractor: Union[nn.Module, Callable] = None
 
     def __init__(
         self,
-        name: str,
-        device: str = "cpu",
         **kwargs,
     ):
-        """Initializes the feature extractor with the given parameters.
-
-        Parameters
-        ----------
-        name: str
-            identifier of the extractor to be used.
-        device: str
-            model device. Defaults to "cpu".
-        **kwargs: Additional keyword arguments.
-
-        Raises
-        ------
-        ValueError
-            if invalid variable "name" is provided for the extractor.
-        """
-        self.name = name if name != "default" else "tsfel"
-        self.device = device
-
-        if self.name == "tsfel":
-            self.extractor = TSFEL()
-        else:
-            raise ValueError(f"Invalid extractor name: {self.name}")
-
-        if self.name != "tsfel":
-            self.extractor._model.to(device)
+        super().__init__()
+        pass
 
+    @torch.no_grad()
     def extract_features_from_files(self, files: List[Path], fs: int, dims: List, batch_size: int = 4):
         """Extract features from a list of image files.
 
@@ -72,12 +53,13 @@ def extract_features_from_files(self, files: List[Path], fs: int, dims: List, ba
         for bsize in batch_sizes:
             end = start + bsize
             signals = [_read_sig_file(f) for f in files[start:end]]
-            batch = self.extractor.extract(signals, fs, dims)
+            batch = self(signals, fs, dims)
             act_array.append(batch)
             start += bsize
         return np.concatenate(act_array, axis=0)
 
-    def extract_features_dataloader(self, dataloader: DataLoader, fs: int, dims: List):
+    @torch.no_grad()
+    def _extract_features_dataloader(self, dataloader: DataLoader, fs: int, dims: List):
         """Use selected approach to extract features from all signals in the
         dataloader.
 
@@ -101,50 +83,10 @@ def extract_features_dataloader(self, dataloader: DataLoader, fs: int, dims: Lis
         ids_array = []
 
         for batch, labels, signal_ids in dataloader:
-            batch_feat = self.extractor.extract(batch, fs, dims)
+            batch_feat = self(batch, fs, dims)
             act_array.append(batch_feat)
             labels_array.extend(labels)
             ids_array.extend(signal_ids)
 
         features = np.concatenate(act_array, axis=0)
-
         return features, labels_array, ids_array
-
-
-class TSFEL:
-    def __init__(self, domains=None):
-        # Generate default domain value
-        if domains is None:
-            domains = ["temporal", "statistical", "spectral"]
-        self.domains = domains
-
-    def extract(self, batch_windows, fs, dims):
-        """Extracts features from a batch of samples.
-
-        Parameters
-        ----------
-        batch_windows: List
-            Batch of signals with len(dims) chans.
-        fs: int
-            Sampling frequency
-        dims: List(str)
-           list with the names of each signal dimension/channel ex: name of each ECG Lead
-
-        Returns
-        -------
-        features: DataFrame
-            DataFrame with the features from each batch.
-        """
-        cfg_file = {}
-        for domain in self.domains:
-            cfg_file.update(tsfel.get_features_by_domain(domain))
-
-        features = tsfel.time_series_features_extractor(
-            cfg_file,
-            batch_windows,
-            fs=fs,
-            window_size=None,
-            header_names=dims,
-        )
-
-        return features
diff --git a/src/pymdma/time_series/models/features.py b/src/pymdma/time_series/models/features.py
new file mode 100644
index 0000000..198006c
--- /dev/null
+++ b/src/pymdma/time_series/models/features.py
@@ -0,0 +1,24 @@
+from typing import List, Optional
+
+from .tsfel import TSFEL
+
+
+class ExtractorFactory:
+    default = "tsfel"
+
+    @staticmethod
+    def model_from_name(
+        name: str,
+        domains: Optional[List[str]] = None,
+        **kwargs,
+    ):
+        """Initializes the feature extractor with the given parameters.
+
+        Args:
+            name (str): identifier of the extractor to be used.
+            device (str): model device. Defaults to "cpu".
+        """
+        if name == "tsfel":
+            return TSFEL(domains, **kwargs)
+        else:
+            raise ValueError(f"Model {name} not available.")
diff --git a/src/pymdma/time_series/models/tsfel.py b/src/pymdma/time_series/models/tsfel.py
new file mode 100644
index 0000000..d12d9e7
--- /dev/null
+++ b/src/pymdma/time_series/models/tsfel.py
@@ -0,0 +1,64 @@
+from typing import List, Optional
+
+import tsfel
+
+from .extractor import BaseTSExtractor
+
+
+class TSFEL(BaseTSExtractor):
+    def __init__(
+        self,
+        domains: Optional[List[str]] = None,
+        verbose: bool = False,
+    ):
+        # Generate default domain value
+        """Initializes the TSFEL feature extractor with the specified domains
+        and verbosity.
+
+        Parameters
+        ----------
+        domains : Optional[List[str]]
+            A list of domains to extract features from. If None, the default domains
+            ["temporal", "statistical", "spectral"] will be used.
+        verbose : bool
+            If True, enables verbose output during feature extraction.
+        """
+
+        if domains is None:
+            domains = ["temporal", "statistical", "spectral"]
+        self.domains = domains
+        self.verbose = verbose
+
+        # update domain configurations
+        self.cfg_file = {}
+        for domain in self.domains:
+            self.cfg_file.update(tsfel.get_features_by_domain(domain))
+
+    def __call__(self, batch_windows, fs, dims):
+        """Extracts features from a batch of samples.
+
+        Parameters
+        ----------
+        batch_windows: List
+            Batch of signals with len(dims) chans.
+        fs: int
+            Sampling frequency
+        dims: List(str)
+           list with the names of each signal dimension/channel ex: name of each ECG Lead
+
+        Returns
+        -------
+        features: DataFrame
+            DataFrame with the features from each batch.
+        """
+        print(self.verbose, int(self.verbose))
+        features = tsfel.time_series_features_extractor(
+            self.cfg_file,
+            batch_windows,
+            fs=fs,
+            window_size=None,
+            header_names=dims,
+            verbose=int(self.verbose),
+        )
+
+        return features
diff --git a/tests/conftest.py b/tests/conftest.py
index db053bb..f294b9c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,7 +18,7 @@
 from pymdma.image.models.features import ExtractorFactory as ImageFeatureExtractor
 from pymdma.time_series.data.simple_dataset import _read_sig_file
 from pymdma.time_series.input_layer import _get_data_files_path
-from pymdma.time_series.utils.extract_features import FeatureExtractor as TimeSeriesFeatureExtractor
+from pymdma.time_series.models.features import ExtractorFactory as TimeSeriesFeatureExtractor
 
 MODALITIES = ["image", "tabular", "time_series"]
 VALIDATION_TYPES = ["input_val", "synthesis_val"]
@@ -73,7 +73,7 @@ def get_transforms(input_size: Tuple[int], interpolation: int = Image.BILINEAR):
             [
                 transforms.Resize(input_size, interpolation=interpolation),
                 transforms.ToTensor(),
-            ]
+            ],
         )
 
     return get_transforms
@@ -99,7 +99,7 @@ def synth_ts_filenames():
 @pytest.fixture()
 def ts_feature_extractor():
     def get_extractor(name):
-        return TimeSeriesFeatureExtractor(name)
+        return TimeSeriesFeatureExtractor.model_from_name(name)
 
     return get_extractor
 

From b3da40ef60a03b367009d86fdf8195a5432a9527 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Thu, 20 Feb 2025 13:05:47 +0000
Subject: [PATCH 2/7] fix: removed input layer logic from time_series examples
 notebook fix: changed pymdma install directive in time_series notebook

---
 notebooks/time_series_examples.ipynb | 113 ++++++++++++++++++++++-----
 1 file changed, 94 insertions(+), 19 deletions(-)

diff --git a/notebooks/time_series_examples.ipynb b/notebooks/time_series_examples.ipynb
index fef7d95..9149369 100644
--- a/notebooks/time_series_examples.ipynb
+++ b/notebooks/time_series_examples.ipynb
@@ -6,7 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install \"pymdma[time_series] @ https://github.com/fraunhoferportugal/pymdma.git\" --find-links \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
+    "%pip install \"pymdma[time_series]\" --find-links \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
    ]
   },
   {
@@ -34,8 +34,66 @@
     "\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
+    "import wfdb\n",
     "\n",
-    "from pymdma.time_series.input_layer import TimeSeriesInputLayer"
+    "from pymdma.time_series.models.features import ExtractorFactory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_sig_file(file_path: Path):\n",
+    "    \"\"\"Read a signal file from the supported file extensions.\n",
+    "\n",
+    "    Parameters:\n",
+    "    -----------\n",
+    "    file_path: Union[str, Path])\n",
+    "        Path to the file.\n",
+    "\n",
+    "    Returns\n",
+    "    --------\n",
+    "    dict\n",
+    "        Dictionary containing the data from the .mat file.\n",
+    "\n",
+    "    Raises\n",
+    "    ------\n",
+    "    ValueError\n",
+    "        If a file extension different from .mat is found.\n",
+    "    \"\"\"\n",
+    "    file_path = Path(file_path)\n",
+    "    # Check if the file has a .mat extension\n",
+    "    if file_path.suffix in [\".mat\", \".dat\"]:\n",
+    "        directory_path = file_path.parent\n",
+    "        return wfdb.rdsamp(directory_path / file_path.stem)[0]\n",
+    "    else:\n",
+    "        # Raise a ValueError for files with unsupported extensions\n",
+    "        raise AssertionError(f\"Unsupported file extension: {Path(file_path).suffix} (file: {file_path})\")\n",
+    "\n",
+    "\n",
+    "def extract_fs_dims(file_path):\n",
+    "    \"\"\"Extracts the sampling frequency and the dimension names of the signal\n",
+    "    from a header file. Only works for this specific .hea file structure.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    file_path: str\n",
+    "        The path to the header file.\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    fs : int\n",
+    "        Sampling frequency.\n",
+    "    dims: List(str)\n",
+    "        Names of the signal dimensions.\n",
+    "    \"\"\"\n",
+    "    with open(file_path) as f:\n",
+    "        lines = f.readlines()\n",
+    "    dims = [lines[i].strip().split(\" \")[-1] for i in range(1, 13)]\n",
+    "    fs = lines[0].strip().split(\" \")[2]\n",
+    "    return int(fs), dims"
    ]
   },
   {
@@ -46,23 +104,15 @@
    "source": [
     "parent_dir = os.path.dirname(os.getcwd())\n",
     "\n",
-    "validation_domain = \"synthesis_val\"\n",
-    "reference_type = \"dataset\"\n",
+    "# List signal files from source dirs\n",
     "target_data_path = Path(parent_dir + \"/data/test/time_series/synthesis_val/dataset/\")\n",
     "reference_data_path = Path(parent_dir + \"/data/test/time_series/synthesis_val/reference/\")\n",
-    "batch_size = 5\n",
-    "\n",
-    "ts_input_layer = TimeSeriesInputLayer(\n",
-    "    validation_domain == validation_domain,\n",
-    "    reference_type=reference_type,\n",
-    "    target_data=target_data_path,\n",
-    "    reference_data=reference_data_path,\n",
-    "    batch_size=batch_size,\n",
-    ")\n",
+    "ref_sig_files = [sig for sig in reference_data_path.glob(\"**/*\") if sig.suffix in {\".mat\", \".dat\", \".csv\"}]\n",
+    "target_sig_files = [sig for sig in target_data_path.glob(\"**/*\") if sig.suffix in {\".mat\", \".dat\", \".csv\"}]\n",
     "\n",
-    "\n",
-    "# Get raw data for input validation\n",
-    "ref_data, target_data = ts_input_layer.get_full_samples()"
+    "# Read signal files\n",
+    "ref_data = np.array([read_sig_file(sig_file) for sig_file in ref_sig_files])\n",
+    "target_data = np.array([read_sig_file(sig_file) for sig_file in target_sig_files])"
    ]
   },
   {
@@ -104,7 +154,7 @@
     "    fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3))\n",
     "    for ax, signal, score in zip(axs.flat, signals, scores):\n",
     "        ax.plot(signal[:, 0])  # ploting only Lead I of the ECG signal\n",
-    "        ax.set_title(f\"{metric}: {score:.2f}\")\n",
+    "        ax.set_title(f\"{metric}: {score:.3f}\")\n",
     "        ax.axis(\"off\")\n",
     "        ax.set_aspect(\"auto\")\n",
     "    # Add a title to the entire figure\n",
@@ -124,6 +174,21 @@
     "This section demonstrates how to use the input validation functions with the signal-to-noise ratio (`SNR`) as an example."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pymdma.time_series.measures.input_val import Uniqueness\n",
+    "\n",
+    "uniqueness = Uniqueness()\n",
+    "uniqueness_result = uniqueness.compute(ref_data)  # compute the metric\n",
+    "_dataset_level, instance_level = uniqueness_result.value  # fetch the instance level results\n",
+    "\n",
+    "plot_instances_score(ref_data, \"Uniqueness\", instance_level, n_cols=5)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -178,8 +243,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Extract the sampling frequency and the dimension names of the signal from a header file\n",
+    "hea_ref = ref_sig_files[0].parent / f\"{ref_sig_files[0].stem}.hea\"\n",
+    "hea_target = target_sig_files[0].parent / f\"{target_sig_files[0].stem}.hea\"\n",
+    "ref_fs, ref_dim = extract_fs_dims(hea_ref)\n",
+    "target_fs, target_dim = extract_fs_dims(hea_target)\n",
+    "\n",
+    "\n",
     "# Get features for synthetic data quality metrics computation\n",
-    "ref_features, target_features = ts_input_layer.get_embeddings(\"tsfel\")\n",
+    "tsfel = ExtractorFactory.model_from_name(\"tsfel\", verbose=False)\n",
+    "ref_features = tsfel.extract_features_from_files(ref_sig_files, ref_fs, ref_dim)\n",
+    "target_features = tsfel.extract_features_from_files(target_sig_files, target_fs, target_dim)\n",
+    "# ref_features, target_features = ts_input_layer.get_embeddings(\"tsfel\")\n",
     "\n",
     "print(\"Reference features shape:\", ref_features.shape)\n",
     "print(\"Synthetic features shape:\", target_features.shape)"
@@ -344,7 +419,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.20"
+   "version": "3.11.11"
   }
  },
  "nbformat": 4,

From 49e2bc58284f7bb61f1a5bd4481c67e7c44f6974 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Thu, 20 Feb 2025 13:59:17 +0000
Subject: [PATCH 3/7] fix: moved extractor import to synthesis part of notebook

---
 Makefile                             |  2 +-
 notebooks/image_examples.ipynb       | 27 ++++++++++++++++++++++++++-
 notebooks/time_series_examples.ipynb |  7 +++----
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index d276c42..dacfbd0 100644
--- a/Makefile
+++ b/Makefile
@@ -127,7 +127,7 @@ setup-all:
 	@echo -e "$(INFO) Creating development virtual environment...$(TERMINATOR)" && \
 	python3 -m venv .venv-dev && \
 	source .venv-dev/bin/activate && \
-	pip install -U poetry<2.0.0 && \
+	pip install -U "poetry<2.0.0" && \
 	poetry run pip install --upgrade pip setuptools && \
 	poetry install --with dev --all-extras && \
 	echo -e "$(SUCCESS) Virtual environment created successfully!$(TERMINATOR)" && \
diff --git a/notebooks/image_examples.ipynb b/notebooks/image_examples.ipynb
index 8a83648..3d36199 100644
--- a/notebooks/image_examples.ipynb
+++ b/notebooks/image_examples.ipynb
@@ -6,7 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install \"pymdma[image] @ https://github.com/fraunhoferportugal/pymdma.git\" --extra-index-url \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
+    "%pip install \"pymdma[image]\" --extra-index-url \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
    ]
   },
   {
@@ -250,6 +250,7 @@
    "source": [
     "from pymdma.image.measures.input_val import MSSSIM\n",
     "\n",
+    "\n",
     "def generate_full_ref_dataset(dataset):\n",
     "    distorted = []\n",
     "    for idx, img in enumerate(dataset):\n",
@@ -260,6 +261,7 @@
     "            distorted.append((dst).astype(np.uint8))\n",
     "    return [np.array(x) for x in distorted]\n",
     "\n",
+    "\n",
     "distorted = generate_full_ref_dataset(dataset)\n",
     "\n",
     "mssim = MSSSIM()\n",
@@ -341,6 +343,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import matplotlib.pyplot as plt\n",
     "from umap import UMAP\n",
     "\n",
     "umap = UMAP(n_components=2, random_state=10, n_jobs=1)\n",
@@ -454,6 +457,28 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "best_idx = np.argsort(giqa_instance)[::-1][:200]\n",
+    "best_samples = [np.asarray(Image.open(images_synth[i])) for i in best_idx]\n",
+    "\n",
+    "best_fig = plot_instances_grid(best_samples, n_cols=25)\n",
+    "best_fig.suptitle(\"CIFAKE Best samples\", fontsize=16)\n",
+    "plt.show()\n",
+    "\n",
+    "\n",
+    "worst_idx = np.argsort(giqa_instance)[:200]\n",
+    "worst_samples = [np.asarray(Image.open(images_synth[i])) for i in worst_idx]\n",
+    "\n",
+    "worst_fig = plot_instances_grid(worst_samples, n_cols=25)\n",
+    "worst_fig.suptitle(\"CIFAKE Worst samples\", fontsize=16)\n",
+    "plt.show()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/time_series_examples.ipynb b/notebooks/time_series_examples.ipynb
index 9149369..056778b 100644
--- a/notebooks/time_series_examples.ipynb
+++ b/notebooks/time_series_examples.ipynb
@@ -34,9 +34,7 @@
     "\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "import wfdb\n",
-    "\n",
-    "from pymdma.time_series.models.features import ExtractorFactory"
+    "import wfdb"
    ]
   },
   {
@@ -243,6 +241,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from pymdma.time_series.models.features import ExtractorFactory\n",
+    "\n",
     "# Extract the sampling frequency and the dimension names of the signal from a header file\n",
     "hea_ref = ref_sig_files[0].parent / f\"{ref_sig_files[0].stem}.hea\"\n",
     "hea_target = target_sig_files[0].parent / f\"{target_sig_files[0].stem}.hea\"\n",
@@ -254,7 +254,6 @@
     "tsfel = ExtractorFactory.model_from_name(\"tsfel\", verbose=False)\n",
     "ref_features = tsfel.extract_features_from_files(ref_sig_files, ref_fs, ref_dim)\n",
     "target_features = tsfel.extract_features_from_files(target_sig_files, target_fs, target_dim)\n",
-    "# ref_features, target_features = ts_input_layer.get_embeddings(\"tsfel\")\n",
     "\n",
     "print(\"Reference features shape:\", ref_features.shape)\n",
     "print(\"Synthetic features shape:\", target_features.shape)"

From fd04bbcc50bd2ee82b3e67a601da0e57052a27b4 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Fri, 21 Feb 2025 10:55:03 +0000
Subject: [PATCH 4/7] fix: removed debug print in TSFEL class

---
 src/pymdma/time_series/models/tsfel.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/pymdma/time_series/models/tsfel.py b/src/pymdma/time_series/models/tsfel.py
index d12d9e7..1ab81ef 100644
--- a/src/pymdma/time_series/models/tsfel.py
+++ b/src/pymdma/time_series/models/tsfel.py
@@ -51,7 +51,6 @@ def __call__(self, batch_windows, fs, dims):
         features: DataFrame
             DataFrame with the features from each batch.
         """
-        print(self.verbose, int(self.verbose))
         features = tsfel.time_series_features_extractor(
             self.cfg_file,
             batch_windows,

From 404e59ebd175d167f78076b79f5ff4a3f9547396 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Fri, 21 Feb 2025 16:37:37 +0000
Subject: [PATCH 5/7] fix: remove change from source installation to pypi on
 notebooks

---
 notebooks/tabular_examples.ipynb | 354 +++++++++++++------------------
 1 file changed, 145 insertions(+), 209 deletions(-)

diff --git a/notebooks/tabular_examples.ipynb b/notebooks/tabular_examples.ipynb
index c393baf..d1136a6 100644
--- a/notebooks/tabular_examples.ipynb
+++ b/notebooks/tabular_examples.ipynb
@@ -6,7 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install \"pymdma[tabular] @ https://github.com/fraunhoferportugal/pymdma.git\" --find-links \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
+    "%pip install \"pymdma[tabular]\" --find-links \"https://download.pytorch.org/whl/cpu/torch_stable.html\""
    ]
   },
   {
@@ -22,16 +22,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from typing import Callable, List, Tuple\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "from typing import Tuple, Callable, List\n",
-    "from scipy.stats import gaussian_kde\n",
-    "from sklearn.neighbors import NearestNeighbors\n",
     "from matplotlib.offsetbox import AnchoredText\n",
-    "\n",
-    "from sklearn.datasets import make_classification"
+    "from scipy.stats import gaussian_kde\n",
+    "from sklearn.datasets import make_classification\n",
+    "from sklearn.neighbors import NearestNeighbors"
    ]
   },
   {
@@ -48,7 +47,9 @@
    "outputs": [],
    "source": [
     "# Nearest Neighbor Model\n",
-    "def _get_nn_model(train: np.ndarray, distance_type: str = 'euclidean'):\n",
+    "\n",
+    "\n",
+    "def _get_nn_model(train: np.ndarray, distance_type: str = \"euclidean\"):\n",
     "    \"\"\"\n",
     "    Find nearest neighbors of test in train with first categoric_slice-many variables being categorical.\n",
     "\n",
@@ -70,14 +71,10 @@
     "\n",
     "# Distances\n",
     "def _get_nn_distances(\n",
-    "    tgt_emb: np.ndarray, \n",
-    "    syn_emb: np.ndarray,\n",
-    "    distance_type: dict = 'euclidean',\n",
-    "    size: int = None\n",
-    ") -> Tuple[np.ndarray]:    \n",
+    "    tgt_emb: np.ndarray, syn_emb: np.ndarray, distance_type: dict = \"euclidean\", size: int = None\n",
+    ") -> Tuple[np.ndarray]:\n",
     "    # checkpoint\n",
-    "    assert tgt_emb.shape[1] == syn_emb.shape[1], \\\n",
-    "    \"Train and Syn have mismatched columns\"\n",
+    "    assert tgt_emb.shape[1] == syn_emb.shape[1], \"Train and Syn have mismatched columns\"\n",
     "\n",
     "    # split into tgt_train, tgt_query, and syn_query\n",
     "    if size is None:\n",
@@ -86,7 +83,7 @@
     "        tgt_size, syn_size = size, size\n",
     "\n",
     "    # train and query from target\n",
-    "    tgt_query = tgt_emb[-int(tgt_size):]\n",
+    "    tgt_query = tgt_emb[-int(tgt_size) :]\n",
     "\n",
     "    # syn_train is not needed\n",
     "    # if sample_size = synthetic_size, syn_query is all syn dataset\n",
@@ -99,7 +96,7 @@
     "    # target\n",
     "    tgt_query_nn, _ = nn_model.kneighbors(tgt_query, n_neighbors=3)\n",
     "    tgt_query_nn = tgt_query_nn[:, 1:]  # except the closest (itself)\n",
-    "    \n",
+    "\n",
     "    # synthetic\n",
     "    syn_query_nn, _ = nn_model.kneighbors(syn_query, n_neighbors=2)\n",
     "\n",
@@ -111,55 +108,51 @@
     "    for label, query in query_dict.items():\n",
     "        # closest neighbor\n",
     "        aux_dcr = query[:, 0]\n",
-    "        \n",
+    "\n",
     "        # normalized closest neighbor distances\n",
-    "        aux_nndr = aux_dcr / (query[:, 1] + 1e-10)  \n",
-    "        \n",
+    "        aux_nndr = aux_dcr / (query[:, 1] + 1e-10)\n",
+    "\n",
     "        # assign\n",
     "        dcr[label] = aux_dcr\n",
     "        nndr[label] = aux_nndr\n",
-    "        \n",
+    "\n",
     "    return dcr, nndr\n",
     "\n",
     "\n",
     "# Probability Density Function\n",
     "def _get_nn_pdf(\n",
-    "    tgt_dist: np.ndarray, \n",
+    "    tgt_dist: np.ndarray,\n",
     "    syn_dist: np.ndarray,\n",
     ") -> Tuple[np.ndarray]:\n",
-    "    \n",
+    "\n",
     "    # get distributions bins\n",
     "    t_min, t_max = min(tgt_dist), max(tgt_dist)\n",
     "    s_min, s_max = min(syn_dist), max(syn_dist)\n",
-    "    bins = np.linspace(\n",
-    "        min([t_min, s_min]), \n",
-    "        max([t_max, s_max]), \n",
-    "        600\n",
-    "    )\n",
+    "    bins = np.linspace(min([t_min, s_min]), max([t_max, s_max]), 600)\n",
     "\n",
     "    # get distributions\n",
     "    # tgt pdf dists\n",
-    "    pdf_tgt = gaussian_kde(\n",
-    "        tgt_dist\n",
-    "    ).pdf(bins)\n",
+    "    pdf_tgt = gaussian_kde(tgt_dist).pdf(bins)\n",
     "    pdf_tgt /= sum(pdf_tgt)\n",
-    "    \n",
+    "\n",
     "    # syn pdf dists\n",
-    "    pdf_syn = gaussian_kde(\n",
-    "        syn_dist\n",
-    "    ).pdf(bins)\n",
+    "    pdf_syn = gaussian_kde(syn_dist).pdf(bins)\n",
     "    pdf_syn /= sum(pdf_syn)\n",
-    "    \n",
+    "\n",
     "    return pdf_tgt, pdf_syn, bins\n",
     "\n",
     "\n",
     "def subplot_dim_optm(dim: int):\n",
     "    import math\n",
+    "\n",
     "    matrix_n, matrix_m = int(np.sqrt(dim)), int(np.sqrt(dim))\n",
-    "    matrix_n += math.ceil((dim - matrix_m ** 2) / matrix_n)\n",
+    "    matrix_n += math.ceil((dim - matrix_m**2) / matrix_n)\n",
     "    return matrix_n, matrix_m\n",
     "\n",
+    "\n",
     "# Plot Generative Quality\n",
+    "\n",
+    "\n",
     "def plot_generative_quality(\n",
     "    real_data_list: List[np.ndarray],\n",
     "    fake_data_list: List[np.ndarray],\n",
@@ -171,7 +164,7 @@
     "):\n",
     "    # plot matrix dim\n",
     "    n_dim, m_dim = subplot_dim_optm(dim=len(real_data_list))\n",
-    "    \n",
+    "\n",
     "    # figures\n",
     "    fig1, axes_emb = plt.figure(n_dim, m_dim, figsize=(12, 8))\n",
     "    fig2, axes_dist = plt.figure(n_dim, m_dim, figsize=(12, 8))\n",
@@ -179,59 +172,37 @@
     "    # flatten axes array\n",
     "    axes_emb = axes_emb.flatten()\n",
     "    axes_dist = axes_dist.flatten()\n",
-    "    \n",
+    "\n",
     "    # loop\n",
-    "    for real_data, fake_data, real_pdf, fake_pdf, bins, name, ax_emb, ax_dist in zip(real_data_list, fake_data_list, real_pdf_list, fake_pdf_list, bins_list, names, axes_emb, axes_dist):\n",
+    "    for real_data, fake_data, real_pdf, fake_pdf, bins, name, ax_emb, ax_dist in zip(\n",
+    "        real_data_list, fake_data_list, real_pdf_list, fake_pdf_list, bins_list, names, axes_emb, axes_dist\n",
+    "    ):\n",
     "        # embeddings\n",
     "        tgt_emb2d = emb_obj.transform(real_data)\n",
-    "        syn_emb2d = emb_obj.transform(fake_data)        \n",
-    "        \n",
-    "        ax_emb.scatter(\n",
-    "            tgt_emb2d[:, 0],\n",
-    "            tgt_emb2d[:, 1],\n",
-    "            color='forestgreen',\n",
-    "            marker='o',\n",
-    "            label='Real',\n",
-    "            alpha=0.7\n",
-    "        )\n",
-    "        ax_emb.scatter(\n",
-    "            syn_emb2d[:, 0],\n",
-    "            syn_emb2d[:, 1],\n",
-    "            color='darkred',\n",
-    "            marker='*',\n",
-    "            label='Fake',\n",
-    "            alpha=0.7\n",
-    "        )\n",
-    "        \n",
+    "        syn_emb2d = emb_obj.transform(fake_data)\n",
+    "\n",
+    "        ax_emb.scatter(tgt_emb2d[:, 0], tgt_emb2d[:, 1], color=\"forestgreen\", marker=\"o\", label=\"Real\", alpha=0.7)\n",
+    "        ax_emb.scatter(syn_emb2d[:, 0], syn_emb2d[:, 1], color=\"darkred\", marker=\"*\", label=\"Fake\", alpha=0.7)\n",
+    "\n",
     "        # set settings\n",
     "        ax_emb.legend()\n",
-    "        ax_emb.set_xlabel('Embedding nr. 0')\n",
-    "        ax_emb.set_ylabel('Embedding nr. 1')\n",
+    "        ax_emb.set_xlabel(\"Embedding nr. 0\")\n",
+    "        ax_emb.set_ylabel(\"Embedding nr. 1\")\n",
     "\n",
     "        # set title\n",
     "        ax_emb.set_title(name)\n",
-    "        \n",
-    "        # distances plot        \n",
-    "        ax_dist.plot(\n",
-    "            bins,\n",
-    "            real_pdf,\n",
-    "            color='forestgreen',\n",
-    "            label='Real'\n",
-    "        )\n",
-    "        ax_dist.fill_between(bins, real_pdf, 0, color='forestgreen', alpha=.1)\n",
-    "        \n",
-    "        ax_dist.plot(\n",
-    "            bins,\n",
-    "            fake_pdf,\n",
-    "            color='darkred',\n",
-    "            label='Fake'\n",
-    "        )\n",
-    "        ax_dist.fill_between(bins, fake_pdf, 0, color='darkred', alpha=.1)\n",
-    "        \n",
+    "\n",
+    "        # distances plot\n",
+    "        ax_dist.plot(bins, real_pdf, color=\"forestgreen\", label=\"Real\")\n",
+    "        ax_dist.fill_between(bins, real_pdf, 0, color=\"forestgreen\", alpha=0.1)\n",
+    "\n",
+    "        ax_dist.plot(bins, fake_pdf, color=\"darkred\", label=\"Fake\")\n",
+    "        ax_dist.fill_between(bins, fake_pdf, 0, color=\"darkred\", alpha=0.1)\n",
+    "\n",
     "        # set settings\n",
     "        ax_dist.legend()\n",
-    "        ax_dist.set_xlabel('Distances')\n",
-    "        ax_dist.set_ylabel('Relative Frequency')\n",
+    "        ax_dist.set_xlabel(\"Distances\")\n",
+    "        ax_dist.set_ylabel(\"Relative Frequency\")\n",
     "\n",
     "        # set title\n",
     "        ax_dist.set_title(name)\n",
@@ -252,7 +223,7 @@
     "\n",
     "    # dataframe conversion\n",
     "    X_df = pd.DataFrame(X, columns=cols)\n",
-    "    X_df['tgt'] = y\n",
+    "    X_df[\"tgt\"] = y\n",
     "\n",
     "    return X_df\n",
     "\n",
@@ -268,37 +239,30 @@
     "    # plot matrix dim\n",
     "    dim = len(dataset_list)\n",
     "    n_dim, m_dim = subplot_dim_optm(dim=dim)\n",
-    "    \n",
+    "\n",
     "    # figures\n",
     "    if share_ax:\n",
     "        fig, axes = plt.subplots(n_dim, m_dim, figsize=(12, 8), sharex=True, sharey=True)\n",
     "    else:\n",
     "        fig, axes = plt.subplots(n_dim, m_dim, figsize=(12, 8))\n",
-    "    \n",
+    "\n",
     "    # flatten axes array\n",
     "    axes = axes.flatten()\n",
     "\n",
     "    # choose a color map\n",
-    "    colors = plt.cm.get_cmap('tab10', dim).colors\n",
-    "    \n",
+    "    colors = plt.cm.get_cmap(\"tab10\", dim).colors\n",
+    "\n",
     "    # loop\n",
     "    for dataset, name, color, ax in zip(dataset_list, names, colors, axes):\n",
     "        # embeddings\n",
     "        data_emb = emb_obj.fit_transform(dataset) if with_fit else emb_obj.transform(dataset)\n",
-    "        \n",
+    "\n",
     "        # scatter plot\n",
-    "        ax.scatter(\n",
-    "            data_emb[:, 0],\n",
-    "            data_emb[:, 1],\n",
-    "            facecolors=color,\n",
-    "            edgecolors=color,\n",
-    "            marker='o',\n",
-    "            alpha=0.7\n",
-    "        )\n",
-    "        \n",
+    "        ax.scatter(data_emb[:, 0], data_emb[:, 1], facecolors=color, edgecolors=color, marker=\"o\", alpha=0.7)\n",
+    "\n",
     "        # set settings\n",
-    "        ax.set_xlabel('Embedding nr. 0')\n",
-    "        ax.set_ylabel('Embedding nr. 1')\n",
+    "        ax.set_xlabel(\"Embedding nr. 0\")\n",
+    "        ax.set_ylabel(\"Embedding nr. 1\")\n",
     "\n",
     "        # set title\n",
     "        ax.set_title(name)\n",
@@ -311,81 +275,69 @@
     "\n",
     "    return fig\n",
     "\n",
+    "\n",
     "def _get_1d_pdf(data: np.ndarray, n_bins: int = 300):\n",
     "    # get distributions bins\n",
     "    d_min, d_max = min(data), max(data)\n",
-    "    bins = np.linspace(\n",
-    "        d_min, \n",
-    "        d_max, \n",
-    "        n_bins\n",
-    "    )\n",
+    "    bins = np.linspace(d_min, d_max, n_bins)\n",
     "\n",
     "    # get distributions\n",
     "    # tgt pdf dists\n",
-    "    pdf = gaussian_kde(\n",
-    "        data.astype(float)\n",
-    "    ).pdf(bins)\n",
+    "    pdf = gaussian_kde(data.astype(float)).pdf(bins)\n",
     "    pdf /= sum(pdf)\n",
-    "    \n",
+    "\n",
     "    return pdf, bins\n",
     "\n",
-    "def plot_kde(reference: np.ndarray, target_list: List[np.ndarray], column_names: List[str] = None, tag_names: List[str] = None, annots: np.ndarray = None):\n",
+    "\n",
+    "def plot_kde(\n",
+    "    reference: np.ndarray,\n",
+    "    target_list: List[np.ndarray],\n",
+    "    column_names: List[str] = None,\n",
+    "    tag_names: List[str] = None,\n",
+    "    annots: np.ndarray = None,\n",
+    "):\n",
     "    num_columns = reference.shape[1]\n",
     "    num_datasets = len(target_list)\n",
-    "    \n",
+    "\n",
     "    # default feature names if not provided\n",
     "    if column_names is None:\n",
     "        column_names = [f\"Col {i+1}\" for i in range(reference.shape[-1])]\n",
     "\n",
     "    if tag_names is None:\n",
     "        tag_names = [f\"Dataset {i+1}\" for i in range(len(target_list))]\n",
-    "    \n",
+    "\n",
     "    # set up the plot grid\n",
     "    fig, axes = plt.subplots(num_columns, num_datasets, figsize=(16, 10))\n",
-    "    \n",
+    "\n",
     "    for i in range(num_columns):\n",
     "        # iterate through each feature (row in subplot grid)\n",
     "        ref_pdf, ref_bins = _get_1d_pdf(reference[:, i], n_bins=400)\n",
     "\n",
-    "        # set ylabel        \n",
+    "        # set ylabel\n",
     "        axes[i, 0].set_ylabel(column_names[i])\n",
     "\n",
     "        # iterate\n",
     "        for j, target in enumerate(target_list):\n",
     "            # plot KDEs for each target dataset (columns in subplot grid)\n",
-    "            at = AnchoredText(str(annots[i, j]), prop=dict(size=7), frameon=False, loc='upper right')\n",
-    "            \n",
+    "            at = AnchoredText(str(annots[i, j]), prop=dict(size=7), frameon=False, loc=\"upper right\")\n",
+    "\n",
     "            # target\n",
     "            tgt_pdf, tgt_bins = _get_1d_pdf(target[:, i], n_bins=400)\n",
     "\n",
     "            # plot the reference KDE on each row\n",
-    "            axes[i, j].plot(\n",
-    "                ref_bins,\n",
-    "                ref_pdf,\n",
-    "                color='forestgreen',\n",
-    "                label='Real',\n",
-    "                ls='--',\n",
-    "                alpha=.3\n",
-    "            )\n",
-    "            axes[i, j].fill_between(ref_bins, ref_pdf, 0, color='forestgreen', alpha=.1)\n",
+    "            axes[i, j].plot(ref_bins, ref_pdf, color=\"forestgreen\", label=\"Real\", ls=\"--\", alpha=0.3)\n",
+    "            axes[i, j].fill_between(ref_bins, ref_pdf, 0, color=\"forestgreen\", alpha=0.1)\n",
     "\n",
     "            # plot the target KDE on each row\n",
-    "            axes[i, j].plot(\n",
-    "                tgt_bins,\n",
-    "                tgt_pdf,\n",
-    "                color='indianred',\n",
-    "                label='Target',\n",
-    "                ls='--',\n",
-    "                alpha=.3\n",
-    "            )\n",
-    "            axes[i, j].fill_between(tgt_bins, tgt_pdf, 0, color='indianred', alpha=.1)\n",
+    "            axes[i, j].plot(tgt_bins, tgt_pdf, color=\"indianred\", label=\"Target\", ls=\"--\", alpha=0.3)\n",
+    "            axes[i, j].fill_between(tgt_bins, tgt_pdf, 0, color=\"indianred\", alpha=0.1)\n",
     "\n",
     "            # add annotation\n",
     "            axes[i, j].add_artist(at)\n",
-    "            \n",
+    "\n",
     "            # window params\n",
     "            if not i:\n",
-    "                axes[i, j].set_title(f'{tag_names[j]}')\n",
+    "                axes[i, j].set_title(f\"{tag_names[j]}\")\n",
     "\n",
     "            axes[i, j].set_yticks([])\n",
     "            axes[i, j].set_xticks([])\n",
@@ -419,9 +371,9 @@
    "outputs": [],
    "source": [
     "## Input validation\n",
-    "# A --> High Volume of Samples + Informative Features \n",
-    "tag_in1 = 'A'\n",
-    "name_in1 = 'A - High Vol. + Inform.'\n",
+    "# A --> High Volume of Samples + Informative Features\n",
+    "tag_in1 = \"A\"\n",
+    "name_in1 = \"A - High Vol. + Inform.\"\n",
     "dataset_in1 = make_dataset(\n",
     "    n_samples=2000,\n",
     "    n_features=10,\n",
@@ -435,12 +387,12 @@
     "    shift=0.0,\n",
     "    scale=3.0,\n",
     "    shuffle=True,\n",
-    "    random_state=42\n",
+    "    random_state=42,\n",
     ").to_numpy()\n",
     "\n",
     "# B --> High Volume of Samples + Non-Informative Features\n",
-    "tag_in2 = 'B'\n",
-    "name_in2 = 'B - High Vol. + Non-Inform.'\n",
+    "tag_in2 = \"B\"\n",
+    "name_in2 = \"B - High Vol. + Non-Inform.\"\n",
     "dataset_in2 = make_dataset(\n",
     "    n_samples=2000,\n",
     "    n_features=10,\n",
@@ -454,12 +406,12 @@
     "    shift=0.0,\n",
     "    scale=3.0,\n",
     "    shuffle=True,\n",
-    "    random_state=42\n",
+    "    random_state=42,\n",
     ").to_numpy()\n",
     "\n",
     "# C --> Small Volume of Samples + High Dimensionality + Informative Features\n",
-    "tag_in3 = 'C'\n",
-    "name_in3 = 'C - Small Vol. + Inform.'\n",
+    "tag_in3 = \"C\"\n",
+    "name_in3 = \"C - Small Vol. + Inform.\"\n",
     "dataset_in3 = make_dataset(\n",
     "    n_samples=200,\n",
     "    n_features=100,\n",
@@ -473,12 +425,12 @@
     "    shift=0.0,\n",
     "    scale=3.0,\n",
     "    shuffle=True,\n",
-    "    random_state=42\n",
+    "    random_state=42,\n",
     ").to_numpy()\n",
     "\n",
     "# D --> Small Volume of Samples + High Dimensionality + Non-informative Features\n",
-    "tag_in4 = 'D'\n",
-    "name_in4 = 'D - Small Vol. + Non-Inform.'\n",
+    "tag_in4 = \"D\"\n",
+    "name_in4 = \"D - Small Vol. + Non-Inform.\"\n",
     "dataset_in4 = make_dataset(\n",
     "    n_samples=200,\n",
     "    n_features=100,\n",
@@ -492,13 +444,13 @@
     "    shift=0.0,\n",
     "    scale=3.0,\n",
     "    shuffle=True,\n",
-    "    random_state=42\n",
+    "    random_state=42,\n",
     ").to_numpy()\n",
     "\n",
     "## Synthesis Evaluation\n",
     "# reference dataset\n",
-    "tag_ref = 'R'\n",
-    "name_ref = 'R - Reference Dataset'\n",
+    "tag_ref = \"R\"\n",
+    "name_ref = \"R - Reference Dataset\"\n",
     "dataset_ref = make_dataset(\n",
     "    n_samples=2000,\n",
     "    n_features=10,\n",
@@ -512,35 +464,35 @@
     "    shift=0.0,\n",
     "    scale=3.0,\n",
     "    shuffle=True,\n",
-    "    random_state=42\n",
+    "    random_state=42,\n",
     ").to_numpy()\n",
     "\n",
     "# A --> random dataset\n",
-    "tag_syn1 = 'A'\n",
-    "name_syn1 = 'A - Random'\n",
+    "tag_syn1 = \"A\"\n",
+    "name_syn1 = \"A - Random\"\n",
     "dataset_s1 = np.random.random(dataset_ref.shape) * 3.0\n",
     "\n",
     "# B --> cumulative small distortion\n",
-    "tag_syn2 = 'B'\n",
-    "name_syn2 = 'B - Small Add Distortion'\n",
+    "tag_syn2 = \"B\"\n",
+    "name_syn2 = \"B - Small Add Distortion\"\n",
     "rnd = np.random.random(dataset_ref.shape) * 0.01\n",
     "dataset_s2 = np.copy(dataset_ref) + rnd\n",
     "\n",
     "# C --> cumulative large distortion\n",
-    "tag_syn3 = 'C'\n",
-    "name_syn3 = 'C - Large Add Distortion'\n",
+    "tag_syn3 = \"C\"\n",
+    "name_syn3 = \"C - Large Add Distortion\"\n",
     "rnd = np.random.random(dataset_ref.shape) * 100\n",
     "dataset_s3 = np.copy(dataset_ref) + rnd\n",
     "\n",
     "# D --> small multiplicative distortion\n",
-    "tag_syn4 = 'D'\n",
-    "name_syn4 = 'D - Small Mult. Distortion'\n",
+    "tag_syn4 = \"D\"\n",
+    "name_syn4 = \"D - Small Mult. Distortion\"\n",
     "rnd = 0.7\n",
     "dataset_s4 = np.copy(dataset_ref) * rnd\n",
     "\n",
     "# E --> large multiplicative distortion\n",
-    "tag_syn5 = 'E'\n",
-    "name_syn5 = 'E - Large Mult. Distortion'\n",
+    "tag_syn5 = \"E\"\n",
+    "name_syn5 = \"E - Large Mult. Distortion\"\n",
     "rnd = 100\n",
     "dataset_s5 = np.copy(dataset_ref) * rnd"
    ]
@@ -572,7 +524,7 @@
     "dataset_norm = list(map(lambda x: scale_obj.fit_transform(x), dataset_list))\n",
     "names = [name_in1, name_in2, name_in3, name_in4]\n",
     "\n",
-    "# plot \n",
+    "# plot\n",
     "_ = plot_datasets(dataset_norm, names, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -607,7 +559,7 @@
     "dataset_norm = list(map(lambda x: scale_obj.transform(x), dataset_list))\n",
     "names = [name_ref, name_syn1, name_syn2, name_syn3, name_syn4, name_syn5]\n",
     "\n",
-    "# plot \n",
+    "# plot\n",
     "_ = plot_datasets(dataset_norm, names, emb_obj, with_fit=False, share_ax=True)"
    ]
   },
@@ -663,14 +615,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.input_val import KAnonymityScore\n",
     "from pymdma.tabular.embeddings.embed import UMAPEmbedder\n",
+    "from pymdma.tabular.measures.input_val import KAnonymityScore\n",
     "\n",
     "# umap embedder\n",
     "emb_obj = UMAPEmbedder(n_components=2)\n",
     "\n",
     "# privacy\n",
-    "score_name = 'KAnonimity'\n",
+    "score_name = \"KAnonimity\"\n",
     "k_anom = KAnonymityScore(column_names=None, qi_names=None)  # K-anonimity\n",
     "\n",
     "# score list\n",
@@ -683,9 +635,9 @@
     "\n",
     "    # append\n",
     "    scores.append(f\"{tag} - {score_name} = {aux_score}\")\n",
-    "    \n",
     "\n",
-    "# plot \n",
+    "\n",
+    "# plot\n",
     "_ = plot_datasets(dataset_list, scores, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -702,14 +654,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.input_val import VIFactorScore\n",
     "from pymdma.tabular.embeddings.embed import UMAPEmbedder\n",
+    "from pymdma.tabular.measures.input_val import VIFactorScore\n",
     "\n",
     "# umap embedder\n",
     "emb_obj = UMAPEmbedder(n_components=2)\n",
     "\n",
     "# quality\n",
-    "score_name = 'VIF Score'\n",
+    "score_name = \"VIF Score\"\n",
     "vif = VIFactorScore(column_names=None)  # VIF\n",
     "\n",
     "# score list\n",
@@ -722,9 +674,9 @@
     "\n",
     "    # append\n",
     "    scores.append(f\"{tag} - {score_name} = {aux_score}\")\n",
-    "    \n",
     "\n",
-    "# plot \n",
+    "\n",
+    "# plot\n",
     "_ = plot_datasets(dataset_list, scores, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -734,14 +686,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.input_val import DimCurseScore\n",
     "from pymdma.tabular.embeddings.embed import UMAPEmbedder\n",
+    "from pymdma.tabular.measures.input_val import DimCurseScore\n",
     "\n",
     "# umap embedder\n",
     "emb_obj = UMAPEmbedder(n_components=2)\n",
     "\n",
     "# quality\n",
-    "score_name = 'Dim. Curse'\n",
+    "score_name = \"Dim. Curse\"\n",
     "dimc = DimCurseScore()  # Dimensionality Curse\n",
     "\n",
     "# score list\n",
@@ -754,9 +706,9 @@
     "\n",
     "    # append\n",
     "    scores.append(f\"{tag} - {score_name} = {aux_score}\")\n",
-    "    \n",
     "\n",
-    "# plot \n",
+    "\n",
+    "# plot\n",
     "_ = plot_datasets(dataset_list, scores, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -766,14 +718,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.input_val import UniformityScore\n",
     "from pymdma.tabular.embeddings.embed import UMAPEmbedder\n",
+    "from pymdma.tabular.measures.input_val import UniformityScore\n",
     "\n",
     "# umap embedder\n",
     "emb_obj = UMAPEmbedder(n_components=2)\n",
     "\n",
     "# quality\n",
-    "score_name = 'Uniformity'\n",
+    "score_name = \"Uniformity\"\n",
     "unif = UniformityScore(column_names=None)  # Uniformity\n",
     "\n",
     "# score list\n",
@@ -785,14 +737,9 @@
     "    aux_score = list(unif.compute(dataset).stats[0].values())\n",
     "\n",
     "    # append\n",
-    "    scores.append(\n",
-    "        f\"{tag} - {score_name} = \" +\n",
-    "        f\"{round(aux_score[0], 1)}\" +\n",
-    "        u'\\u00B1' + \n",
-    "        f\"{round(aux_score[1], 1)} %\"\n",
-    "    )\n",
+    "    scores.append(f\"{tag} - {score_name} = \" + f\"{round(aux_score[0], 1)}\" + \"\\u00b1\" + f\"{round(aux_score[1], 1)} %\")\n",
     "\n",
-    "# plot \n",
+    "# plot\n",
     "_ = plot_datasets(dataset_list, scores, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -802,14 +749,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.input_val import OutlierScore\n",
     "from pymdma.tabular.embeddings.embed import UMAPEmbedder\n",
+    "from pymdma.tabular.measures.input_val import OutlierScore\n",
     "\n",
     "# umap embedder\n",
     "emb_obj = UMAPEmbedder(n_components=2)\n",
     "\n",
     "# quality\n",
-    "score_name = 'Outlier Score'\n",
+    "score_name = \"Outlier Score\"\n",
     "outl = OutlierScore()  # Outliers\n",
     "\n",
     "# score list\n",
@@ -821,14 +768,9 @@
     "    aux_score = list(outl.compute(dataset).stats[0].values())\n",
     "\n",
     "    # append\n",
-    "    scores.append(\n",
-    "        f\"{tag} - {score_name} = \" +\n",
-    "        f\"{round(aux_score[0], 1)}\" +\n",
-    "        u'\\u00B1' + \n",
-    "        f\"{round(aux_score[1], 1)} %\"\n",
-    "    )\n",
-    "    \n",
-    "# plot \n",
+    "    scores.append(f\"{tag} - {score_name} = \" + f\"{round(aux_score[0], 1)}\" + \"\\u00b1\" + f\"{round(aux_score[1], 1)} %\")\n",
+    "\n",
+    "# plot\n",
     "_ = plot_datasets(dataset_list, scores, emb_obj, with_fit=True, share_ax=False)"
    ]
   },
@@ -923,7 +865,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.synthesis_val import ImprovedPrecision, ImprovedRecall, Authenticity, Coverage\n",
+    "from pymdma.tabular.measures.synthesis_val import Authenticity, Coverage, ImprovedPrecision, ImprovedRecall\n",
     "\n",
     "ip, ip_name = ImprovedPrecision(k=5), \"P\"\n",
     "ir, ir_name = ImprovedRecall(k=5), \"R\"\n",
@@ -955,7 +897,7 @@
     "    else:\n",
     "        # append\n",
     "        scores.append(f\"Reference Dataset - {score_s}\")\n",
-    "    \n",
+    "\n",
     "\n",
     "# plot\n",
     "_ = plot_datasets(dataset_norm_list, scores, emb_obj, with_fit=False, share_ax=True)"
@@ -974,12 +916,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pymdma.tabular.measures.synthesis_val import StatisticalSimScore, CoherenceScore\n",
-    "from pymdma.tabular.measures.synthesis_val import DCRPrivacy\n",
+    "from pymdma.tabular.measures.synthesis_val import CoherenceScore, DCRPrivacy, StatisticalSimScore\n",
     "\n",
     "ssim, ssim_name = StatisticalSimScore(), \"AttSim\"\n",
-    "coher, coher_name = CoherenceScore(weights=None, corr_type='pearson'), \"CorrCoH\"\n",
-    "dcr, dcr_name = DCRPrivacy(distance_type='euclidean'), \"DCR\"\n",
+    "coher, coher_name = CoherenceScore(weights=None, corr_type=\"pearson\"), \"CorrCoH\"\n",
+    "dcr, dcr_name = DCRPrivacy(distance_type=\"euclidean\"), \"DCR\"\n",
     "\n",
     "# score list\n",
     "scores = []\n",
@@ -987,9 +928,9 @@
     "# compute scores\n",
     "for dataset, tag in zip(dataset_norm_list, tag_list):\n",
     "    # compute\n",
-    "    ssim_score = ssim.compute(ref, dataset).stats[0].get('mean')\n",
+    "    ssim_score = ssim.compute(ref, dataset).stats[0].get(\"mean\")\n",
     "    coher_score = coher.compute(ref, dataset).value[0]\n",
-    "    dcr_score = dcr.compute(ref, dataset).value[0].get('privacy')\n",
+    "    dcr_score = dcr.compute(ref, dataset).value[0].get(\"privacy\")\n",
     "\n",
     "    # aggregate all scores\n",
     "    aux_scores = [ssim_score, coher_score, dcr_score]\n",
@@ -1005,7 +946,7 @@
     "    else:\n",
     "        # append\n",
     "        scores.append(f\"Reference Dataset - {score_s}\")\n",
-    "    \n",
+    "\n",
     "\n",
     "# plot\n",
     "_ = plot_datasets(dataset_norm_list, scores, emb_obj, with_fit=False, share_ax=True)"
@@ -1050,11 +991,6 @@
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": ".venv-dev",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
@@ -1065,7 +1001,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.11.11"
   }
  },
  "nbformat": 4,

From d1fa557c3b1d37bd327a1f4ec8bfefe0f43c2dc5 Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Fri, 21 Feb 2025 16:46:50 +0000
Subject: [PATCH 6/7] Minor version bump 0.1.8

---
 CHANGELOG.md   | 29 +++++++++++++++++++++++++++++
 README.md      |  2 +-
 VERSION        |  2 +-
 pyproject.toml |  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3928997..ecd2ad7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,35 @@
 All notable changes to this project will be documented in this file.
 This format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## [0.1.8] - 2025-02-21
+Minor patch release with Time Series standardization and documentation updates.
+
+### Changed
+ - Introduced `BaseTSExtractor` torch module for time_series
+ - Loading TSFEL config on class instanciation
+
+### Fixed
+ - Removed input layer logic from time_series example notebook (should not use internal methods or classes)
+
+
+## [0.1.7] - 2025-01-31
+Minor patch release with reduced dependency tree and documentaion updates.
+
+### Added
+ - `convert_grayscale` option in `PSNR`metric
+ - Explicitely listing `SSIM` and `MSSIM` params
+ - Non deterministic warning for MSID documentation
+
+### Changed
+ - Renamed `MSSIM` to `MSSSIM`
+ - GIQA documentation to indicate QS and DS as in paper
+ - Removed unnecessary torchmetrics dependency for image
+
+### Fixed
+ - Added `np.isclose` verification in hypersphere estimation functions for synthetic evaluation
+
+
 ## [0.1.6] - 2024-12-10
 Minor patch release with new image features extraction method and documentation updates.
 
diff --git a/README.md b/README.md
index 8412b02..72ceefa 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ Depending on the data modality you want to use, you may need to install addition
 ```bash
 pip install "pymdma[image] @ git+https://github.com/fraunhoferportugal/pymdma.git" # image dependencies
 pip install "pymdma[tabular] @ git+https://github.com/fraunhoferportugal/pymdma.git" # tabular dependencies
-pip install "pymdma[tabular] @ git+https://github.com/fraunhoferportugal/pymdma.git" # time series dependencies
+pip install "pymdma[time_series] @ git+https://github.com/fraunhoferportugal/pymdma.git" # time series dependencies
 ```
 
 For a minimal installation, you can install the package without CUDA support by forcing pip to install torch from the CPU index with the `--find-url` command.
diff --git a/VERSION b/VERSION
index 1180819..699c6c6 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.7
+0.1.8
diff --git a/pyproject.toml b/pyproject.toml
index 5a31d32..5705ab3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 # https://github.com/microsoft/vscode-python/blob/master/CHANGELOG.md#enhancements-1
 [tool.poetry]
 name = "pymdma"
-version = "0.1.7"
+version = "0.1.8"
 description = "Multimodal Data Metrics for Auditing real and synthetic data"
 authors = ["Fraunhofer AICOS <no-reply@aicos.fraunhofer.pt>"]
 maintainers = [

From f4964656ce47ecf274630d91147087469075994b Mon Sep 17 00:00:00 2001
From: Ivo Facoco <ivo.facoco@fraunhofer.pt>
Date: Fri, 21 Feb 2025 17:29:58 +0000
Subject: [PATCH 7/7] fix: remove repeated cell in image notebook

---
 notebooks/image_examples.ipynb | 34 +++-------------------------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/notebooks/image_examples.ipynb b/notebooks/image_examples.ipynb
index 3d36199..f2dcd68 100644
--- a/notebooks/image_examples.ipynb
+++ b/notebooks/image_examples.ipynb
@@ -250,7 +250,6 @@
    "source": [
     "from pymdma.image.measures.input_val import MSSSIM\n",
     "\n",
-    "\n",
     "def generate_full_ref_dataset(dataset):\n",
     "    distorted = []\n",
     "    for idx, img in enumerate(dataset):\n",
@@ -317,17 +316,15 @@
     "\n",
     "from pymdma.image.models.features import ExtractorFactory\n",
     "\n",
+    "N_SAMPLES = 2000\n",
     "random.seed(10)\n",
     "\n",
     "cifake_test_path = cifake_path / \"test\"\n",
     "test_images_ref = Path(cifake_test_path / \"REAL\")  # real images\n",
     "test_images_synth = Path(cifake_test_path / \"FAKE\")  # synthetic images\n",
     "\n",
-    "images_ref = [img for img in test_images_ref.iterdir() if img.is_file()]\n",
-    "images_synth = [img for img in test_images_synth.iterdir() if img.is_file()]\n",
-    "\n",
-    "# images_ref = random.sample([img for img in test_images_ref.iterdir() if img.is_file()], 5000)\n",
-    "# images_synth = random.sample([img for img in test_images_synth.iterdir() if img.is_file()], 5000)\n",
+    "images_ref = [img for img in test_images_ref.iterdir() if img.is_file()][:N_SAMPLES]\n",
+    "images_synth = [img for img in test_images_synth.iterdir() if img.is_file()][:N_SAMPLES]\n",
     "\n",
     "extractor = ExtractorFactory.model_from_name(name=\"dino_vits8\")\n",
     "ref_features = extractor.extract_features_from_files(images_ref)\n",
@@ -343,7 +340,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
     "from umap import UMAP\n",
     "\n",
     "umap = UMAP(n_components=2, random_state=10, n_jobs=1)\n",
@@ -449,36 +445,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
-    "\n",
     "giqa_result.plot(\"GIQA\", bins=50)\n",
     "plt.xlabel(\"Score\")\n",
     "plt.ylabel(\"Frequency\")\n",
     "plt.show()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "best_idx = np.argsort(giqa_instance)[::-1][:200]\n",
-    "best_samples = [np.asarray(Image.open(images_synth[i])) for i in best_idx]\n",
-    "\n",
-    "best_fig = plot_instances_grid(best_samples, n_cols=25)\n",
-    "best_fig.suptitle(\"CIFAKE Best samples\", fontsize=16)\n",
-    "plt.show()\n",
-    "\n",
-    "\n",
-    "worst_idx = np.argsort(giqa_instance)[:200]\n",
-    "worst_samples = [np.asarray(Image.open(images_synth[i])) for i in worst_idx]\n",
-    "\n",
-    "worst_fig = plot_instances_grid(worst_samples, n_cols=25)\n",
-    "worst_fig.suptitle(\"CIFAKE Worst samples\", fontsize=16)\n",
-    "plt.show()"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,