Merge branch 'main' of github.com:mariolpantunes/knee

mariolpantunes · Jan 23, 2025 · 1014d9e · 1014d9e
2 parents 0d368aa + 9fef003
commit 1014d9e
Show file tree

Hide file tree

Showing 14 changed files with 343,231 additions and 27 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Install Python 3
         uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Install package
         run: |
           python -m pip install --upgrade pip

diff --git a/CITATION.cff b/CITATION.cff
@@ -56,9 +56,9 @@ authors:
     orcid: 'https://orcid.org/0000-0001-5248-9184'
 identifiers:
   - type: doi
-    value: 10.5281/zenodo.10341887
+    value: 10.5281/zenodo.11089667
 repository-code: 'https://github.com/mariolpantunes/knee'
-url: 'https://pypi.org/project/knee/'
+url: 'https://pypi.org/project/kneeliverse/'
 license: MIT
 version: '1.0'
-date-released: '2024-04-26'
+date-released: '2024-05-01'
diff --git a/examples/plot_journal_kmeans.py b/examples/plot_journal_kmeans.py
@@ -66,8 +66,8 @@ def main():
         plt.scatter(X_cluster_i[:,2], X_cluster_i[:,3], c=colormap[i], s=40, marker=markers[i])
 
     ax = plt.gca()
-    ax.set_xlabel('Petal length')
-    ax.set_ylabel('Petal width')
+    ax.set_xlabel('Petal length (cm)')
+    ax.set_ylabel('Petal width (cm)')
     ax.set_xlim([0, 7.1])
     ax.set_ylim([0, 2.6])
     plt.savefig('out/kmeans_clusters.png', bbox_inches='tight', transparent=True)

diff --git a/examples/plot_journal_knees.py b/examples/plot_journal_knees.py
@@ -39,6 +39,8 @@
 logging.getLogger('PIL').setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 
+numba_logger = logging.getLogger('numba')
+numba_logger.setLevel(logging.WARNING)
 
 def main(args):
     # Color Blind adjusted colors and markers
@@ -49,7 +51,7 @@ def main(args):
 
     points = np.genfromtxt(args.i, delimiter=',')
     logger.info(f'Loading {args.i} file ({len(points)})...')
-    reduced, removed = rdp.mp_grdp(points, t=0.0001, min_points=20)
+    reduced, removed = rdp.mp_grdp(points, t=0.00075, min_points=20)
     space_saving = round((1.0-(len(reduced)/len(points)))*100.0, 2)
     logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving)
 
@@ -58,6 +60,10 @@ def main(args):
     y = points[:,1]
     plt.plot(x, y, color= colormap[0])
     points_reduced = points[reduced]
+
+    # save points to CSV
+    np.savetxt(f'traces/web0_reduced.csv', points_reduced, delimiter=",")
+
     plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
     plt.savefig('out/knees_trace_reduced.png', bbox_inches='tight', transparent=True)
     plt.savefig('out/knees_trace_reduced.pdf', bbox_inches='tight', transparent=True)
@@ -73,9 +79,47 @@ def main(args):
     plt.show()
 
     # Plot filtered and final knees
+    previous_knees_len = len(knees)
+    previous_knees = set(knees)
+
     knees = pp.filter_worst_knees(points_reduced, knees)
+    if len(knees) < previous_knees_len:
+        diff = previous_knees.difference(set(knees))
+        logger.debug(f'Filter worst knees removed {previous_knees_len-len(knees)} ({diff})')
+
+        previous_knees = set(knees)
+        previous_knees_len = len(knees)
+
+        knees_worst = rdp.mapping(knees, reduced, removed)
+        plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
+        plt.plot(x[knees_original], y[knees_original], 's', markersize=5, color=colormap[2])
+        plt.plot(x[knees_worst], y[knees_worst], 'o', markersize=7, color=colormap[7])
+        plt.show()
+
     knees = pp.filter_corner_knees(points_reduced, knees, t=0.33)
+    if len(knees) < previous_knees_len:
+        diff = previous_knees.difference(set(knees))
+        logger.debug(f'Filter corner knees removed {previous_knees_len-len(knees)} ({diff})')
+        previous_knees = set(knees)
+        previous_knees_len = len(knees)
+        knees_corner = rdp.mapping(knees, reduced, removed)
+        plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
+        plt.plot(x[knees_worst], y[knees_worst], 's', markersize=5, color=colormap[2])
+        plt.plot(x[knees_corner], y[knees_corner], 'o', markersize=7, color=colormap[7])
+        plt.show()
+
     knees = pp.filter_clusters(points_reduced, knees, clustering.average_linkage, 0.05, knee_ranking.ClusterRanking.left)
+    if len(knees) < previous_knees_len:
+        diff = previous_knees.difference(set(knees))
+        logger.debug(f'Filter cluster removed {previous_knees_len-len(knees)} ({diff})')
+        previous_knees = set(knees)
+        previous_knees_len = len(knees)
+        knees_cluster = rdp.mapping(knees, reduced, removed)
+        plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
+        plt.plot(x[knees_corner], y[knees_corner], 's', markersize=5, color=colormap[2])
+        plt.plot(x[knees_cluster], y[knees_cluster], 'o', markersize=7, color=colormap[7])
+        plt.show()
+
     knees = rdp.mapping(knees, reduced, removed)
     plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
     plt.plot(x[knees_original], y[knees_original], 's', markersize=5, color=colormap[2])

diff --git a/reduce.sh b/reduce.sh
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,10 @@
-tqdm>=4.66.1
-pdoc>=14.3.0
-numpy>=1.26.2
-imageio>=2.33.1
-jax[cpu]>=0.4.25
-pyBlindOpt>=0.1.1
-matplotlib>=3.8.2
-scikit-learn>=1.3.2
+tqdm>=4.66.5
+pdoc>=14.6.0
+numpy>=2.1.0
+imageio>=2.35.1
+jax[cpu]>=0.4.31
+pyBlindOpt>=0.1.3
+matplotlib>=3.9.2
+scikit-learn>=1.5.1
 exectimeit>=0.1.1
-pyUTSAlgorithms>=0.1.2
+pyUTSAlgorithms>=0.1.3
diff --git a/setup.cfg b/setup.cfg
@@ -18,8 +18,8 @@ package_dir =
 packages = find:
 python_requires = >=3.8
 install_requires =
-    numpy >= 1.23
-    pyUTSAlgorithms>=0.1.2
+    numpy>=2.0.0
+    pyUTSAlgorithms>=0.1.3
 
 [options.packages.find]
 where = src
diff --git a/src/kneeliverse/linear_fit.py b/src/kneeliverse/linear_fit.py
@@ -24,6 +24,7 @@
 
 from typing import Union
 
+from numba import jit
 
 logger = logging.getLogger(__name__)
 
@@ -46,6 +47,7 @@ def linear_fit_points(points: np.ndarray) -> tuple:
     return linear_fit(x, y)
 
 
+@jit(nopython=True)
 def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple:
     """
     Computes the linear fit for the points.
@@ -85,6 +87,7 @@ def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray:
     return linear_transform(x, coef)
 
 
+@jit(nopython=True)
 def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray:
     """
     Computes the y values for an x array and the given coefficients.
@@ -120,6 +123,7 @@ def linear_hv_residuals_points(points: np.ndarray) -> float:
     return linear_hv_residuals(x,y)
 
 
+@jit(nopython=True)
 def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float:
     """
     Computes the residual error from a straight line fitting.
@@ -169,6 +173,7 @@ def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Unio
     return linear_fit_transform(x, y, vertical)
 
 
+@jit(nopython=True)
 def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]:
     # try a tipical y = mx + b line
     coef1 = linear_fit(x, y)
@@ -206,6 +211,7 @@ def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R
     return linear_r2(x, y, coef, r2)
 
 
+@jit(nopython=True)
 def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
     """
     Computes the coefficient of determination (R2).
@@ -253,6 +259,7 @@ def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
     return rmspe(x, y, coef, eps)
 
 
+@jit(nopython=True)
 def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
     """
     Computes the Root Mean Squared Percentage Error (RMSPE).
@@ -289,6 +296,7 @@ def rmsle_points(points: np.ndarray, coef: tuple) -> float:
     return rmsle(x, y, coef)
 
 
+@jit(nopython=True)
 def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
     """
     Computes the Root Mean Squared Log Error (RMSLE):
@@ -423,6 +431,7 @@ def linear_fit_residuals_points(points: np.ndarray) -> float:
     return linear_fit_residuals(x, y)
 
 
+@jit(nopython=True)
 def linear_fit_residuals(x: np.ndarray, y: np.ndarray) -> float:
     coef = linear_fit(x, y)
     y_hat = linear_transform(x, coef)
@@ -451,6 +460,7 @@ def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
         return r2(x, y, t)
 
 
+@jit(nopython=True)
 def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
     """Computes the coefficient of determination (R2).
 
@@ -529,6 +539,7 @@ def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray):
     return np.hypot(h, c)
 
 
+@jit(nopython=True)
 def perpendicular_distance(points: np.ndarray) -> np.ndarray:
     """
     Computes the perpendicular distance from the points to the 
@@ -544,6 +555,7 @@ def perpendicular_distance(points: np.ndarray) -> np.ndarray:
     return perpendicular_distance_index(points, 0, len(points) - 1)
 
 
+@jit(nopython=True)
 def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray:
     """
     Computes the perpendicular distance from the points to the 
@@ -560,6 +572,7 @@ def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> n
     return left + perpendicular_distance_points(points[left:right+1], points[left], points[right])
 
 
+@jit(nopython=True)
 def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray:
     """
     Computes the perpendicular distance from the points to the 

diff --git a/src/kneeliverse/metrics.py b/src/kneeliverse/metrics.py
@@ -18,6 +18,8 @@
 import enum
 import numpy as np
 
+from numba import jit
+
 
 class Metrics(enum.Enum):
     """
@@ -45,6 +47,7 @@ def __str__(self):
         return self.value
 
 
+@jit(nopython=True)
 def r2(y: np.ndarray, y_hat: np.ndarray, r2: R2 = R2.classic) -> float:
     """
     Computes the coefficient of determination (R2).
@@ -73,6 +76,7 @@ def r2(y: np.ndarray, y_hat: np.ndarray, r2: R2 = R2.classic) -> float:
     return rv
 
 
+@jit(nopython=True)
 def rmse(y: np.ndarray, y_hat: np.ndarray) -> float:
     """
     Computes the Root Mean Squared Error (RMSE).
@@ -87,6 +91,7 @@ def rmse(y: np.ndarray, y_hat: np.ndarray) -> float:
     return np.sqrt(np.mean(np.square(y - y_hat)))
 
 
+@jit(nopython=True)
 def rmsle(y: np.ndarray, y_hat: np.ndarray) -> float:
     """
     Computes the Root Mean Squared Log Error (RMSLE):
@@ -104,6 +109,7 @@ def rmsle(y: np.ndarray, y_hat: np.ndarray) -> float:
     return np.sqrt(np.mean(np.square((np.log(y+1) - np.log(y_hat+1)))))
 
 
+@jit(nopython=True)
 def rmspe(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
     """
     Computes the Root Mean Squared Percentage Error (RMSPE).
@@ -119,6 +125,7 @@ def rmspe(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
     return np.sqrt(np.mean(np.square((y - y_hat) / (y+eps))))
 
 
+@jit(nopython=True)
 def rpd(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
     """
     Computes the Relative Percentage Difference (RPD).
@@ -134,6 +141,7 @@ def rpd(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
     return np.mean(np.abs((y - y_hat) / (np.maximum(y, y_hat)+eps)))
 
 
+@jit(nopython=True)
 def residuals(y: np.ndarray, y_hat: np.ndarray) -> float:
     """
     Computes the residual error of the fit.
@@ -148,6 +156,7 @@ def residuals(y: np.ndarray, y_hat: np.ndarray) -> float:
     return np.sum(np.square((y-y_hat)))
 
 
+@jit(nopython=True)
 def smape(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
     """
     Computes Symmetric Mean Absolute Percentage Error (SMAPE).

diff --git a/src/kneeliverse/rdp.py b/src/kneeliverse/rdp.py
@@ -25,6 +25,9 @@
 import kneeliverse.evaluation as evaluation
 
 
+from numba import jit
+
+
 #import matplotlib.pyplot as plt
 
 logger = logging.getLogger(__name__)