-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinreg.py
90 lines (69 loc) · 2.75 KB
/
linreg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
import pandas as pd
import util
#Class that runs linear regression
class LinearRegression():
#Initialize class
def __init__(self, theta_0=None, use_theta_0=False):
self.theta = theta_0
self.use_theta_0 = use_theta_0
#Fit using mini-batch descent
def fit(self, train_x, train_y, eta=10**-3, eps=10**-11, max_iters=10**8, batch_size=100):
m,n = train_x.shape
if batch_size > m: batch_size = m
if not self.use_theta_0:
self.theta = np.zeros(n)
i = 0
loc = 0
while True:
#Define mini-batch to consider
if loc + batch_size > m:
cur_xs = np.append(train_x[loc:,:], train_x[:(loc+batch_size)%m,:], axis=0)
cur_ys = np.append(train_y[loc:], train_y[:(loc+batch_size)%m])
else:
cur_xs = train_x[loc:loc+batch_size]
cur_ys = train_y[loc:loc+batch_size]
loc = (loc + batch_size) % m
#Find miss and direction to update towards
preds = np.dot(cur_xs, self.theta)
misses = cur_ys[:,0] - preds
#Update theta
new_theta = self.theta + eta * (1/batch_size) * np.dot(cur_xs.T, misses)
if np.linalg.norm(self.theta-new_theta) < eps or i > max_iters:
# print(i, np.linalg.norm(self.theta-new_theta))
self.theta = new_theta[:]
break
i += 1
if i % 100 == 0:
# print('magnitude theta', np.linalg.norm(new_theta))
# print(i, np.linalg.norm(self.theta-new_theta))
pass
self.theta = new_theta[:]
#Fit model with stochastic gradient descent
def fit_stochastic(self, train_x, train_y, eta=10**-3, eps=10**-11, max_iters=10**7):
m,n = train_x.shape
if not self.use_theta_0:
self.theta = np.zeros(n)
#Run stochastic gradient descent
i = 0
while True:
cur_x = train_x[i % m,:]
cur_y = train_y[i % m]
pred = self.theta.dot(cur_x)
miss = cur_y - pred
#Update theta, see if you have converged
new_theta = self.theta + eta * miss * cur_x
conv = np.linalg.norm(self.theta-new_theta)
if conv < eps or i > max_iters:
# print(i, conv)
self.theta = new_theta[:]
break
i += 1
self.theta = new_theta[:]
#Makes predictions on some test input
def predict(self, test_x):
m,n = test_x.shape
preds = np.zeros(m)
for i, cur_x in enumerate(test_x):
preds[i] = self.theta.dot(cur_x)
return preds