-
Notifications
You must be signed in to change notification settings - Fork 0
/
AnomalyDetector.py
84 lines (70 loc) · 2.77 KB
/
AnomalyDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
class AnomalyDetector:
"""
A class for anomaly detection
"""
def __init__(self, data, target = 'Leistung'):
"""
Initialize AnomalyDetector instance
Creates class variable, datetime index, and model class instance
"""
self.model = IsolationForest(random_state=0)
self.data = data.set_index('Dat/Zeit')
self.target = target
def fit_model(self):
"""
Fit the Isolation Forest model to the specified data
Returns:
Fit model
"""
self.model.fit(self.data)
def detect_anomalies(self):
"""
Detect anomalies in the data using the Isolation Forest model
Returns:
Updated dataframe with anomaly column
"""
# self.data['score'] = self.model.decision_function(self.data)
self.data['anomaly'] = self.model.predict(self.data)
return self.data
def visualize_anomalies(self, until_date = '' ):
"""
Visualize anomalies in the data
Returns:
None
"""
if not until_date:
until_date = self.data.index.strftime('%Y-%m-%d')[-1]
#### Visualize anomalies in specific period
plt.figure(figsize=(12, 6))
plt.plot(self.data.index[self.data.index < until_date],
self.data.loc[self.data.index < until_date,self.target],
label='Normal', color='blue', linewidth=0.2)
# Mark outliers with 'o'
plt.scatter(self.data.index[(self.data['anomaly'] == -1) & (self.data.index < until_date)],
self.data.loc[(self.data['anomaly'] == -1)& (self.data.index < until_date), self.target],
c='red', marker='x', label='Anomaly', s=10)
plt.xlabel('Sample Index')
plt.ylabel('Output')
plt.title('Isolation Forest Anomaly Detection')
plt.legend()
plt.show()
def visualize_tsne(self):
"""
Visualize data after dimnesionality reduction
Returns:
None
"""
# Apply t-SNE for dimensionality reduction
tsne = TSNE(n_components=2, random_state=0)
tsne_result = tsne.fit_transform(self.data[self.data.columns.difference(['anomaly', 'score'])])
# Plot t-SNE with colored anomalies
plt.figure(figsize=(10, 8))
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=self.data['anomaly'], cmap='coolwarm', marker='o', s=5)
plt.title('t-SNE Plot with Anomaly Coloring')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.colorbar(label='Anomaly')
plt.show()