Skip to content

Commit

Permalink
Merge branch 'main' of github.com:mariolpantunes/knee
Browse files Browse the repository at this point in the history
  • Loading branch information
mariolpantunes committed Jan 23, 2025
2 parents 0d368aa + 9fef003 commit 1014d9e
Show file tree
Hide file tree
Showing 14 changed files with 343,231 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Install Python 3
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.12
- name: Install package
run: |
python -m pip install --upgrade pip
Expand Down
6 changes: 3 additions & 3 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ authors:
orcid: 'https://orcid.org/0000-0001-5248-9184'
identifiers:
- type: doi
value: 10.5281/zenodo.10341887
value: 10.5281/zenodo.11089667
repository-code: 'https://github.com/mariolpantunes/knee'
url: 'https://pypi.org/project/knee/'
url: 'https://pypi.org/project/kneeliverse/'
license: MIT
version: '1.0'
date-released: '2024-04-26'
date-released: '2024-05-01'
4 changes: 2 additions & 2 deletions examples/plot_journal_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def main():
plt.scatter(X_cluster_i[:,2], X_cluster_i[:,3], c=colormap[i], s=40, marker=markers[i])

ax = plt.gca()
ax.set_xlabel('Petal length')
ax.set_ylabel('Petal width')
ax.set_xlabel('Petal length (cm)')
ax.set_ylabel('Petal width (cm)')
ax.set_xlim([0, 7.1])
ax.set_ylim([0, 2.6])
plt.savefig('out/kmeans_clusters.png', bbox_inches='tight', transparent=True)
Expand Down
46 changes: 45 additions & 1 deletion examples/plot_journal_knees.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
logging.getLogger('PIL').setLevel(logging.WARNING)
logger = logging.getLogger(__name__)

numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

def main(args):
# Color Blind adjusted colors and markers
Expand All @@ -49,7 +51,7 @@ def main(args):

points = np.genfromtxt(args.i, delimiter=',')
logger.info(f'Loading {args.i} file ({len(points)})...')
reduced, removed = rdp.mp_grdp(points, t=0.0001, min_points=20)
reduced, removed = rdp.mp_grdp(points, t=0.00075, min_points=20)
space_saving = round((1.0-(len(reduced)/len(points)))*100.0, 2)
logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving)

Expand All @@ -58,6 +60,10 @@ def main(args):
y = points[:,1]
plt.plot(x, y, color= colormap[0])
points_reduced = points[reduced]

# save points to CSV
np.savetxt(f'traces/web0_reduced.csv', points_reduced, delimiter=",")

plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
plt.savefig('out/knees_trace_reduced.png', bbox_inches='tight', transparent=True)
plt.savefig('out/knees_trace_reduced.pdf', bbox_inches='tight', transparent=True)
Expand All @@ -73,9 +79,47 @@ def main(args):
plt.show()

# Plot filtered and final knees
previous_knees_len = len(knees)
previous_knees = set(knees)

knees = pp.filter_worst_knees(points_reduced, knees)
if len(knees) < previous_knees_len:
diff = previous_knees.difference(set(knees))
logger.debug(f'Filter worst knees removed {previous_knees_len-len(knees)} ({diff})')

previous_knees = set(knees)
previous_knees_len = len(knees)

knees_worst = rdp.mapping(knees, reduced, removed)
plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
plt.plot(x[knees_original], y[knees_original], 's', markersize=5, color=colormap[2])
plt.plot(x[knees_worst], y[knees_worst], 'o', markersize=7, color=colormap[7])
plt.show()

knees = pp.filter_corner_knees(points_reduced, knees, t=0.33)
if len(knees) < previous_knees_len:
diff = previous_knees.difference(set(knees))
logger.debug(f'Filter corner knees removed {previous_knees_len-len(knees)} ({diff})')
previous_knees = set(knees)
previous_knees_len = len(knees)
knees_corner = rdp.mapping(knees, reduced, removed)
plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
plt.plot(x[knees_worst], y[knees_worst], 's', markersize=5, color=colormap[2])
plt.plot(x[knees_corner], y[knees_corner], 'o', markersize=7, color=colormap[7])
plt.show()

knees = pp.filter_clusters(points_reduced, knees, clustering.average_linkage, 0.05, knee_ranking.ClusterRanking.left)
if len(knees) < previous_knees_len:
diff = previous_knees.difference(set(knees))
logger.debug(f'Filter cluster removed {previous_knees_len-len(knees)} ({diff})')
previous_knees = set(knees)
previous_knees_len = len(knees)
knees_cluster = rdp.mapping(knees, reduced, removed)
plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
plt.plot(x[knees_corner], y[knees_corner], 's', markersize=5, color=colormap[2])
plt.plot(x[knees_cluster], y[knees_cluster], 'o', markersize=7, color=colormap[7])
plt.show()

knees = rdp.mapping(knees, reduced, removed)
plt.plot(points_reduced[:, 0], points_reduced[:, 1], linestyle=lines[2], marker='o', markersize=3, color=colormap[1])
plt.plot(x[knees_original], y[knees_original], 's', markersize=5, color=colormap[2])
Expand Down
9 changes: 0 additions & 9 deletions reduce.sh

This file was deleted.

18 changes: 9 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
tqdm>=4.66.1
pdoc>=14.3.0
numpy>=1.26.2
imageio>=2.33.1
jax[cpu]>=0.4.25
pyBlindOpt>=0.1.1
matplotlib>=3.8.2
scikit-learn>=1.3.2
tqdm>=4.66.5
pdoc>=14.6.0
numpy>=2.1.0
imageio>=2.35.1
jax[cpu]>=0.4.31
pyBlindOpt>=0.1.3
matplotlib>=3.9.2
scikit-learn>=1.5.1
exectimeit>=0.1.1
pyUTSAlgorithms>=0.1.2
pyUTSAlgorithms>=0.1.3
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ package_dir =
packages = find:
python_requires = >=3.8
install_requires =
numpy >= 1.23
pyUTSAlgorithms>=0.1.2
numpy>=2.0.0
pyUTSAlgorithms>=0.1.3

[options.packages.find]
where = src
13 changes: 13 additions & 0 deletions src/kneeliverse/linear_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from typing import Union

from numba import jit

logger = logging.getLogger(__name__)

Expand All @@ -46,6 +47,7 @@ def linear_fit_points(points: np.ndarray) -> tuple:
return linear_fit(x, y)


@jit(nopython=True)
def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple:
"""
Computes the linear fit for the points.
Expand Down Expand Up @@ -85,6 +87,7 @@ def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray:
return linear_transform(x, coef)


@jit(nopython=True)
def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray:
"""
Computes the y values for an x array and the given coefficients.
Expand Down Expand Up @@ -120,6 +123,7 @@ def linear_hv_residuals_points(points: np.ndarray) -> float:
return linear_hv_residuals(x,y)


@jit(nopython=True)
def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float:
"""
Computes the residual error from a straight line fitting.
Expand Down Expand Up @@ -169,6 +173,7 @@ def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Unio
return linear_fit_transform(x, y, vertical)


@jit(nopython=True)
def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]:
# try a tipical y = mx + b line
coef1 = linear_fit(x, y)
Expand Down Expand Up @@ -206,6 +211,7 @@ def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R
return linear_r2(x, y, coef, r2)


@jit(nopython=True)
def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
"""
Computes the coefficient of determination (R2).
Expand Down Expand Up @@ -253,6 +259,7 @@ def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
return rmspe(x, y, coef, eps)


@jit(nopython=True)
def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
"""
Computes the Root Mean Squared Percentage Error (RMSPE).
Expand Down Expand Up @@ -289,6 +296,7 @@ def rmsle_points(points: np.ndarray, coef: tuple) -> float:
return rmsle(x, y, coef)


@jit(nopython=True)
def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
"""
Computes the Root Mean Squared Log Error (RMSLE):
Expand Down Expand Up @@ -423,6 +431,7 @@ def linear_fit_residuals_points(points: np.ndarray) -> float:
return linear_fit_residuals(x, y)


@jit(nopython=True)
def linear_fit_residuals(x: np.ndarray, y: np.ndarray) -> float:
coef = linear_fit(x, y)
y_hat = linear_transform(x, coef)
Expand Down Expand Up @@ -451,6 +460,7 @@ def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
return r2(x, y, t)


@jit(nopython=True)
def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
"""Computes the coefficient of determination (R2).
Expand Down Expand Up @@ -529,6 +539,7 @@ def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray):
return np.hypot(h, c)


@jit(nopython=True)
def perpendicular_distance(points: np.ndarray) -> np.ndarray:
"""
Computes the perpendicular distance from the points to the
Expand All @@ -544,6 +555,7 @@ def perpendicular_distance(points: np.ndarray) -> np.ndarray:
return perpendicular_distance_index(points, 0, len(points) - 1)


@jit(nopython=True)
def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray:
"""
Computes the perpendicular distance from the points to the
Expand All @@ -560,6 +572,7 @@ def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> n
return left + perpendicular_distance_points(points[left:right+1], points[left], points[right])


@jit(nopython=True)
def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray:
"""
Computes the perpendicular distance from the points to the
Expand Down
9 changes: 9 additions & 0 deletions src/kneeliverse/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import enum
import numpy as np

from numba import jit


class Metrics(enum.Enum):
"""
Expand Down Expand Up @@ -45,6 +47,7 @@ def __str__(self):
return self.value


@jit(nopython=True)
def r2(y: np.ndarray, y_hat: np.ndarray, r2: R2 = R2.classic) -> float:
"""
Computes the coefficient of determination (R2).
Expand Down Expand Up @@ -73,6 +76,7 @@ def r2(y: np.ndarray, y_hat: np.ndarray, r2: R2 = R2.classic) -> float:
return rv


@jit(nopython=True)
def rmse(y: np.ndarray, y_hat: np.ndarray) -> float:
"""
Computes the Root Mean Squared Error (RMSE).
Expand All @@ -87,6 +91,7 @@ def rmse(y: np.ndarray, y_hat: np.ndarray) -> float:
return np.sqrt(np.mean(np.square(y - y_hat)))


@jit(nopython=True)
def rmsle(y: np.ndarray, y_hat: np.ndarray) -> float:
"""
Computes the Root Mean Squared Log Error (RMSLE):
Expand All @@ -104,6 +109,7 @@ def rmsle(y: np.ndarray, y_hat: np.ndarray) -> float:
return np.sqrt(np.mean(np.square((np.log(y+1) - np.log(y_hat+1)))))


@jit(nopython=True)
def rmspe(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
"""
Computes the Root Mean Squared Percentage Error (RMSPE).
Expand All @@ -119,6 +125,7 @@ def rmspe(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
return np.sqrt(np.mean(np.square((y - y_hat) / (y+eps))))


@jit(nopython=True)
def rpd(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
"""
Computes the Relative Percentage Difference (RPD).
Expand All @@ -134,6 +141,7 @@ def rpd(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
return np.mean(np.abs((y - y_hat) / (np.maximum(y, y_hat)+eps)))


@jit(nopython=True)
def residuals(y: np.ndarray, y_hat: np.ndarray) -> float:
"""
Computes the residual error of the fit.
Expand All @@ -148,6 +156,7 @@ def residuals(y: np.ndarray, y_hat: np.ndarray) -> float:
return np.sum(np.square((y-y_hat)))


@jit(nopython=True)
def smape(y: np.ndarray, y_hat: np.ndarray, eps: float = 1e-16) -> float:
"""
Computes Symmetric Mean Absolute Percentage Error (SMAPE).
Expand Down
3 changes: 3 additions & 0 deletions src/kneeliverse/rdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
import kneeliverse.evaluation as evaluation


from numba import jit


#import matplotlib.pyplot as plt

logger = logging.getLogger(__name__)
Expand Down
Loading

0 comments on commit 1014d9e

Please sign in to comment.