-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogRegvsPAC.py
110 lines (75 loc) · 2.81 KB
/
LogRegvsPAC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 24 14:56:28 2019
@author: Hans
"""
%reset -f
#%% create a random dataset
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# Create the dataset, all features are composed out of data drawn from gaussian distributions
X, Y = make_classification(n_samples=5000,
n_features=4,
n_informative=2,
n_redundant=0,
n_repeated=0,
n_classes=2,
n_clusters_per_class=2)
# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1000)
#%% We perform logistic regression
lr = LogisticRegression()
lr.fit(X_train, Y_train)
print('Logistic Regression score: {}'.format(lr.score(X_test, Y_test)))
#%% Passive Aggressive Classification
import matplotlib.pyplot as plt
Y_train[Y_train==0] = -1
Y_test[Y_test==0] = -1
C = 0.01 # softens the classification
w = np.zeros((X_train.shape[1], 1))
# Implement a Passive Aggressive Classification
for i in range(X_train.shape[0]):
xi = X_train[i].reshape((X_train.shape[1], 1))
loss = max(0, 1 - (Y_train[i] * np.dot(w.T, xi))) # implement the Hinge Loss
tau = loss / (np.power(np.linalg.norm(xi, ord=2), 2) + (1 / (2*C))) #2-norm (largest sing. value)
coeff = tau * Y_train[i]
w += coeff * xi
# Compute accuracy
Y_pred = np.sign(np.dot(w.T, X_test.T))
c = np.count_nonzero(Y_pred - Y_test)
print('PA accuracy: {}'.format(1 - float(c) / X_test.shape[0]))
# after one run we obtain the same result
#%% Regression
"""
Instead of a False and True outcome, we have a continious outcome; such as a percentage
"""
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_regression
# Create the dataset
X, Y = make_regression(n_samples=5000,
n_features=4)
# Implement a Passive Aggressive Regression
C = 0.01 #agressiveness of the correction
eps = 0.1 # introduce a tollerance for small errors
w = np.zeros((X.shape[1], 1))
errors = []
for i in range(X.shape[0]):
xi = X[i].reshape((X.shape[1], 1))
yi = np.dot(w.T, xi)
loss = max(0, np.abs(yi - Y[i]) - eps)
tau = loss / (np.power(np.linalg.norm(xi, ord=2), 2) + (1 / (2*C)))
coeff = tau * np.sign(Y[i] - yi) # since continious outcome, and no longer -1 or 1
errors.append(np.abs(Y[i] - yi)[0, 0])
w += coeff * xi
# Show the error plot
fig, ax = plt.subplots(figsize=(16, 8))
ax.plot(errors)
ax.set_xlabel('Time')
ax.set_ylabel('Error')
ax.set_title('Passive Aggressive Regression Absolute Error')
ax.grid()
plt.show()
# play with C and eps to optimise results