Dev aifpaif (#152)

* updating 10_entropy notebook * updating 10_entropy notebook * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update tutorial export ipynb to .py Add references * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py * updating 10_entropy notebook * Add authors * Update 10_entropy.py * Update 10_entropy.py * Update 10_entropy.py --------- Co-authored-by: “Frederic-vW” <“[email protected]”> Co-authored-by: Victor Férat <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
vferat · Feb 6, 2024 · ab7ac25 · ab7ac25
1 parent 2b34e1c
commit ab7ac25
Show file tree

Hide file tree

Showing 3 changed files with 211 additions and 81 deletions.
diff --git a/=61.0.0 b/=61.0.0
@@ -0,0 +1,2 @@
+Defaulting to user installation because normal site-packages is not writeable
+Requirement already satisfied: setuptools in /home/frederic/.local/lib/python3.6/site-packages (41.0.1)
diff --git a/docs/references.bib b/docs/references.bib
@@ -167,3 +167,11 @@ @article{von2018partial
  volume = {9},
  year = {2018}
 }
+
+@article{vonWegner2023,
+ author = {von Wegner,  Frederic and Wiemers,  Milena and Hermann,  Gesine and T\"{o}dt,  Inken and Tagliazucchi,  Enzo and Laufs,  Helmut},
+ doi = {10.1007/s10548-023-01006-2},
+ journal = {Brain Topography},
+ title = {Complexity Measures for EEG Microstate Sequences: Concepts and Algorithms},
+ year = {2023}
+}
diff --git a/tutorials/segmentation/10_entropy.py b/tutorials/segmentation/10_entropy.py
@@ -1,135 +1,255 @@
 """
-Segmentation metrics
-====================
+Entropy and related measures
+============================
 
-This tutorial introduces measures of entropy and excess entropy for microstate.
-
-We start by fitting a modified K-means
-(:class:`~pycrostates.cluster.ModKMeans`) with a sample dataset. For more
-details about fitting a clustering algorithm, please refer to
-:ref:`this tutorial <sphx_glr_generated_auto_tutorials_cluster_00_modK.py>`.
-
-.. note::
-
-    The lemon datasets used in this tutorial is composed of EEGLAB files. To
-    use the MNE reader :func:`mne.io.read_raw_eeglab`, the ``pymatreader``
-    optional dependency is required. Use the following installation method
-    appropriate for your environment:
-
-    - ``pip install pymatreader``
-    - ``conda install -c conda-forge pymatreader``
-
-    Note that an environment created via the `MNE installers`_ includes
-    ``pymatreader`` by default.
-
-.. include:: ../../../../links.inc
 """
+#%%
+# .. include:: ../../../../links.inc
+
+#%%
+# .. note::
+#
+#     The lemon datasets used in this tutorial is composed of EEGLAB files. To
+#     use the MNE reader :func:`mne.io.read_raw_eeglab`, the ``pymatreader``
+#     optional dependency is required. Use the following installation method
+#     appropriate for your environment:
+#
+#     - ``pip install pymatreader``
+#     - ``conda install -c conda-forge pymatreader``
+#
+#     Note that an environment created via the `MNE installers`_ includes
+#     ``pymatreader`` by default.
+
+# sphinx_gallery_thumbnail_number = 5
+# Authors: Frederic von Wegner <[email protected]>
+#          Victor Férat <[email protected]>
 
-# %%
-
-# sphinx_gallery_thumbnail_number = 2
-
-import numpy as np
 from matplotlib import pyplot as plt
 from mne.io import read_raw_eeglab
-
 from pycrostates.cluster import ModKMeans
 from pycrostates.datasets import lemon
 
-raw_fname = lemon.data_path(subject_id="010017", condition="EC")
+#%% [markdown]
+# Load example data
+# -----------------
+# We load 60 seconds of eyes-closed (EC) resting state data from the \ :footcite:t:`babayan_mind-brain-body_2019` database.
+
+#%%
+raw_fname = lemon.data_path(subject_id="010021", condition="EC")
 raw = read_raw_eeglab(raw_fname, preload=False)
 raw.crop(0, 60)  # crop the dataset to speed up computation
 raw.load_data()
 raw.set_eeg_reference("average")  # Apply a common average reference
 
+#%%
+# This subject has a posterior dominant rhythm in the alpha frequency band
+_ = raw.plot(duration=10.0, start=10.0, n_channels=25)
+
+# %% [markdown]
+# Microstate clustering
+# ---------------------
+# Perform a clustering into K=5 microstate classes
+
+# %%
 ModK = ModKMeans(n_clusters=5, random_state=42)
 ModK.fit(raw, n_jobs=2)
-ModK.reorder_clusters(order=[4, 1, 3, 2, 0])
+ModK.reorder_clusters(order=[4, 2, 0, 3, 1]) # 010021
 ModK.rename_clusters(new_names=["A", "B", "C", "D", "F"])
 ModK.plot()
 
-# %%
-# Once a set of cluster centers has been fitted, it can be used to predict the
+#%% [markdown]
+# Segmentation
+# ------------
+# Perform two segmentations, i.e. back-fitting the maps obtained in the previous step, followed by:
+#
+# - 1. Minimal post-processing (``half_window_size=1``, ``min_segment_length=1``)
+# - 2. Temporal smoothing  (``half_window_size=3``, ``min_segment_length=5``)
+#
+
+#%%
+# Once a set of cluster centers has been fitted, It can be used to predict the
 # microstate segmentation with the method
 # :meth:`pycrostates.cluster.ModKMeans.predict`. It returns either a
 # `~pycrostates.segmentation.RawSegmentation` or an
 # `~pycrostates.segmentation.EpochsSegmentation` depending on the object to
 # segment. Below, the provided instance is a continuous `~mne.io.Raw` object:
 
-segmentation = ModK.predict(
+segm_pure = ModK.predict(
     raw,
     reject_by_annotation=True,
     factor=10,
-    half_window_size=10,
-    min_segment_length=5,
+    half_window_size=1,
+    min_segment_length=1,
     reject_edges=True,
 )
 
-# %%
-# Entropy
-# -------
-# TODO: explain the concept of entropy and its application to MS analysis
+segm_smooth = ModK.predict(
+    raw,
+    reject_by_annotation=True,
+    factor=10,
+    half_window_size=5,
+    min_segment_length=5,
+    reject_edges=True,
+)
 
-h = segmentation.entropy(ignore_repetitions=False)
+#%% [markdown]
+# The differences between the non-smoothed (pure) and smoothed microstate sequences can be visualized with the plot method of the segmentation objects.
 
 # %%
-# We can also ignore state repetitions (i.e. self-transitions) by setting the
-# ``ignore_repetitions`` to True. This is useful when you don't want to take state
-# duration into account.
+t0, t1 = 6, 7
+_ = segm_pure.plot(tmin=t0, tmax=t1)
+plt.show()
+_ = segm_smooth.plot(tmin=t0, tmax=t1)
+plt.show()
 
-h = segmentation.entropy(ignore_repetitions=True)
+#%% [markdown]
+# Shannon entropy
+# ---------------
+# The Shannon entropy \ :footcite:t:`shannon1948mathematical` of the microstate sequence describes how flat the microstate class distribution is. The two extremes are:
+#
+# - A flat distribution. In this example, the maximum entropy would be observed if each microstate class (A, B, C, D, F) had probability :math:`p=1/5`. The resulting Shannon entropy would be :math:`h=log(5)=2.32 bits`.
+# - A peaked distribution. If any microstate class occurs with probability :math:`p=1`, and all other classes with probability :math:`p=0`, the resulting Shannon entropy would achieve its minimum value of :math:`h=0` bits.  
+# 
+# In the example below, we observe that smoothing leads to a slight entropy reduction. 
+
+#%%
+# Entropy
+h_pure = segm_pure.entropy(ignore_repetitions=False)
+h_smooth = segm_smooth.entropy(ignore_repetitions=False)
+print(f"Microstate sequence without smoothing, Shannon entropy h = {h_pure:.2f} bits")
+print(f"Microstate sequence with    smoothing, Shannon entropy h = {h_smooth:.2f} bits")
 
-# %%
-# Excess entropy
-# --------------
-# TODO: explain the concept of excess entropy and its application to MS analysis and parameters.
+#%% [markdown]
+# Entropy rate and excess entropy
+# -------------------------------
+# The entropy rate of the microstate sequence is a measure of its Kolmogorov complexity while excess entropy measures statistical complexity. High entropy rate (or high Kolmogorov complexity) means the next microstate label is less predictable, based on the sequence history.  
+# History length is provided as a free parameter ``history_length`` when calling the `~pycrostates.segmentation.excess_entropy_rate`, and is given in units of samples.  
+# The `~pycrostates.segmentation.excess_entropy_rate` function performs a linear fit to joint entropy values across different history lengths and returns two parameters; the slope parameter corresponds to the entropy rate, the y-axis intersection to excess entropy.  
+# This procedure is illustrated below. Further details are given in \ :footcite:t:`vonWegner2023`.
 
+#%%
+# Excess entropy
 from pycrostates.segmentation import excess_entropy_rate
+import matplotlib.pyplot as plt
 
-a, b, residuals, lags, joint_entropies = excess_entropy_rate(
-    segmentation, history_length=12, ignore_repetitions=False
-)
+h_length = 9 # history_length
+a, b, residuals, lags, joint_entropies = excess_entropy_rate(segm_pure, 
+                                                             history_length=h_length, 
+                                                             ignore_repetitions=False)
+print(f"Entropy rate: {a:.3f} bits/sample.")
+print(f"Excess entropy: {b:.3f} bits.")
 
-plt.figure(layout="constrained")
-plt.plot(lags, joint_entropies, "-sk")
-plt.plot(lags, a * lags + b, "-b")
+# joint entropy plot from which excess entropy and entropy rate are calculated
+plt.figure()
+plt.plot(lags, joint_entropies, '-sk')
+plt.plot(lags, a*lags+b, '-b')
+plt.xlabel("lag (samples)")
+plt.ylabel("joint entropy (bit)")
 plt.title("Entropy rate & excess entropy")
 plt.show()
 
-# %%
-# Auto information function (AIF)
-# -------------------------------
-# TODO: explain the auto_information_function and parameters.
-
+#%% [markdown]
+# We can now test how microstate sequence (Kolmogorov) complexity changes with pre-processing:
+#
+# - no smoothing, full microstate sequence (duplicates not removed)
+# - smoothing, full microstate sequence (duplicates not removed)
+# - no smoothing, microstate jump sequence (duplicates removed)
+# - smoothing, microstate jump sequence (duplicates removed)
+# 
+# Smoothing makes microstate sequences more predictable (less complex), removing duplicates makes sequences less predictable (more complex).
+
+#%%
+# We can ignore state repetitions (i.e. self-transitions) by setting the ignore_repetitions to ``True``.
+# This is useful when you don't want to take state duration into account.
+er_pure, _, _, _, _ = excess_entropy_rate(segm_pure, history_length=h_length, ignore_repetitions=False)
+er_smooth, _, _, _, _ = excess_entropy_rate(segm_smooth, history_length=h_length, ignore_repetitions=False)
+er_pure_jump, _, _, _, _ = excess_entropy_rate(segm_pure, history_length=h_length, ignore_repetitions=True)
+er_smooth_jump, _, _, _, _ = excess_entropy_rate(segm_smooth, history_length=h_length, ignore_repetitions=True)
+print(f"1. Microstate      sequence without smoothing, entropy rate: {er_pure:.2f} bits/sample")
+print(f"2. Microstate      sequence with    smoothing, entropy rate: {er_smooth:.2f} bits/sample")
+print(f"3. Microstate jump sequence without smoothing, entropy rate: {er_pure_jump:.2f} bits/sample")
+print(f"4. Microstate jump sequence with    smoothing, entropy rate: {er_smooth_jump:.2f} bits/sample")
+
+#%% [markdown]
+# Autoinformation function
+# ------------------------
+# The autoinformation function (AIF) is the information-theoretic analogy to the autocorrelation function (ACF) for numerical time series.  
+# The autoinformation coefficient at time lag :math:`k` is the information shared between microstate labels :math:`k` time samples apart. Mathematically, it is computed as the mutual information between the microstate label :math:`X_t` at time :math:`t`, and the label :math:`X_{t+k}` at :math:`t+k`, averaged across the whole sequence: :math:`H(X_{t+k}) - H(X_{t+k} \vert X_{t})`.
+# 
+# Below, we compare the AIF of microstate sequences with and without smoothing. Smoothing increases overall temporal dependencies and removes microstate oscillations (AIF peaks at 50, 100, 150 ms) that are visible in the minimally pre-processed sequence.
+
+#%%
+# We compute the AIF for the full microstate sequence with and without smoothing
 from pycrostates.segmentation import auto_information_function
+import numpy as np
 
-lags, ai = auto_information_function(
-    segmentation, lags=np.arange(1, 20), ignore_repetitions=False, n_jobs=2
-)
-
-plt.figure(layout="constrained")
-plt.plot(lags, ai, "-sk")
-plt.title("Auto information function")
+lags1 = np.arange(1, 100)
+lags, ai_pure = auto_information_function(segm_pure, lags=lags1, ignore_repetitions=False, n_jobs=2)
+lags, ai_smooth = auto_information_function(segm_smooth, lags=lags1, ignore_repetitions=False, n_jobs=2)
+
+#%%
+# AIF plot
+lags_ms = lags*1000/raw.info['sfreq'] # convert samples in milliseconds
+fsize = 14
+
+plt.figure(figsize=(16,4))
+plt.semilogy(lags_ms, ai_pure, '-k', label='AIF pure')
+plt.semilogy(lags_ms, ai_smooth, '-b', label='AIF smooth')
+plt.legend(loc='upper right', fontsize=fsize)
+plt.xlabel("lag (ms)", fontsize=fsize)
+plt.ylabel("autoinf. (bit)", fontsize=fsize)
+plt.title("Auto information function", fontsize=fsize)
 plt.show()
 
-# %%
-# Partial Auto information function (pAIF)
-# ----------------------------------------
-# TODO: explain the partial_auto_information_function and parameters.
-
+#%% [markdown]
+# Partial autoinformation
+# -----------------------
+# Partial autoinformation (PAI) describes the dependence between microstate sequence labels :math:`k` samples apart, removing the influence of all intermediate labels. The autoinformation function does not account for the effect of intermediate time steps.  
+# PAI is computationally more expensive and it is recommended to start with a low number of lags (e.g. ``5``).  
+# PAI coefficients can identify (first-order) Markov processes as their PAI coefficients are zero for lags :math:`k \ge 2`. 
+# 
+# Below, we compare the PAI coefficients of microstate sequences with and without smoothing.
+# It is observed that smoothing shifts temporal dependencies towards the first time lag, i.e. smoothing makes sequences more Markovian.
+
+#%%
 from pycrostates.segmentation import partial_auto_information_function
+import numpy as np
 
-lags, pai = partial_auto_information_function(
-    segmentation, lags=np.arange(1, 5), ignore_repetitions=False, n_jobs=1
-)
-
-plt.figure()
-plt.plot(lags, pai, "-sk")
-plt.title("Partial Auto information function")
+lags_paif = np.arange(1, 6)
+lags2, pai_pure = partial_auto_information_function(segm_pure, 
+                                                    lags=lags_paif, 
+                                                    ignore_repetitions=False, 
+                                                    n_jobs=1)
+lags2, pai_smooth = partial_auto_information_function(segm_smooth, 
+                                                      lags=lags_paif, 
+                                                      ignore_repetitions=False, 
+                                                      n_jobs=1)
+
+#%%
+# PAIF plot
+w = 0.4
+wh = w/2
+
+fig = plt.figure(figsize=(7.5,5))
+ax = plt.gca()
+ax.bar(lags_paif-wh, pai_pure, width=w, color='k', label='pure')
+ax.bar(lags_paif+wh, pai_smooth, width=w, color='b', label='smoothed')
+ax.legend(loc='upper right', fontsize=14)
+ax.set_xlabel("lag (samples)")
+ax.set_ylabel("partial autoinf. (bit)")
+
+offset = 1
+left, bottom, width, height = [0.5, 0.35, 0.35, 0.3]
+axin = fig.add_axes([left, bottom, width, height])
+axin.bar(lags_paif[offset:]-wh, pai_pure[offset:], color='k', width=w)
+axin.bar(lags_paif[offset:]+wh, pai_smooth[offset:], color='b', width=w)
+axin.set_xlabel("lag (samples)")
+axin.set_ylabel("partial autoinf. (bit)")
+
+ax.set_title("Partial autoinformation function")
 plt.show()
 
-
-# %%
+#%%
 # References
 # ----------
 # .. footbibliography::