-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcoding_neural_network.py
142 lines (120 loc) · 4.46 KB
/
coding_neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import sklearn
import sklearn.datasets
import matplotlib.pyplot as plt
import matplotlib.pyplot
# Initialize weights and biases
def init_weights_biases(input_dim, hdim1, hdim2, output_dim):
# Initialize the parameters to random values. We need to learn these.
np.random.seed(0)
W1 = np.random.randn(input_dim, hdim1) / np.sqrt(input_dim)
b1 = np.zeros((1, hdim1))
W2 = np.random.randn(hdim1, hdim2) / np.sqrt(hdim1)
b2 = np.zeros((1, hdim2))
W3 = np.random.randn(hdim2, output_dim) / np.sqrt(output_dim)
b3 = np.zeros((1, output_dim))
return W1,b1,W2,b2,W3,b3
# Softmax Activation
def softmax(input):
exp_scores = np.exp(input)
return exp_scores/np.sum(exp_scores, axis=1, keepdims=True)
# Cross Entropy Loss
def log_likelihood(model, y):
W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'], model['W3'], model['b3']
z1 = np.dot(X, W1) + b1
# RELU
# a1 = np.maximum(0, z1)
# tanh
a1 = np.tanh(z1)
z2 = np.dot(a1, W2) + b2
# RELU
# a2 = np.maximum(0, z2)
# tanh
a2 = np.tanh(z2)
z3 = np.dot(a2, W3) + b3
probs = softmax(z3)
# error = -1/len(probs)*np.sum(y*np.log(probs)+(1-y)*np.log(1-probs))
error = -np.log(probs[range(num_examples), y])
data_loss = np.sum(error) / num_examples
reg_loss = reg_strength/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
# return np.sum(np.nan_to_num(-y*np.log(probs)-(1-y)*n
return reg_loss + data_loss
def plot_decision_boundary(pred_func):
# Set min and max values depending on data matrix and give it some padding-->(1)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them, that will be the points we put the contour on-->(2)
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid, that gives the contour the color-->(3)
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)# makes contour plots of z, surface is xx and yy-->(4)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)#plots the normal scatter plot -->(5)
#for contourplot reference see http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.contourf
X, y = sklearn.datasets.make_moons(200, noise=0.20)
num_examples = len(X) # training set size
input_dim = 2 # input layer dimensionality
hdim1 = 3 # hidden layer1
hdim2 = 3 # hidden layer2
output_dim = 2 # output layer dimensionality
num_passes = 15000
# Gradient descent parameters
lr_rate = 0.00001 # learning rate for gradient descent
reg_strength = 0.01 # regularization strength
# init weights and biases
W1, b1, W2, b2, W3, b3 = init_weights_biases(input_dim, hdim1, hdim2, output_dim)
# Gradient descent. For each batch
for i in range(0, num_passes):
# Forward Propagation
z1 = np.dot(X, W1) + b1
# relu
a1 = np.maximum(0, z1)
# a1 = np.tanh(z1)
z2 = np.dot(a1, W2) + b2
# relu
a2 = np.maximum(0, z2)
# a2 = np.tanh(z2)
z3 = np.dot(a2, W3) + b3
probs = softmax(z3)
# Backward Propagation
delta3 = probs
delta3[range(num_examples), y] -= 1
delta3 /= num_examples
dW3 = np.dot(a2.T, delta3)
db3 = np.sum(delta3, axis=0, keepdims=True)
# for RELU
# delta2 = np.dot(delta3, W3.T)
# delta2[a2 <=0 ] = 0
delta2 = np.dot(delta3, W3.T) * (1-np.power(a2, 2))
dW2 = np.dot(a1.T, delta2)
db2 = np.sum(delta2, axis=0)
# for RELU
# delta1 = np.dot(delta2, W2.T)
# delta1[a1 <=0 ] = 0
delta1 = np.dot(delta2, W2.T) * (1-np.power(a1, 2))
dW1 = np.dot(X.T, delta1)
db1 = np.sum(delta1, axis=0)
# Add regularization
dW1 += reg_strength * W1
dW2 += reg_strength * W2
dW3 += reg_strength * W3
# Gradient descent parameter update
# update parameters with learning rate lr_rate
W1 += lr_rate * W1
b1 += lr_rate * b1
W2 += lr_rate * W2
b2 += lr_rate * b2
W3 += lr_rate * W3
b3 += lr_rate * b3
model = {'W1':W1, 'b1':b1, 'W2':W2, 'b2':b2, 'W3':W3, 'b3':b3}
if i % 1000 == 0:
# Calculate accuracy
correct = [1 if i[0]>0 else 0 for i in probs]
correct = [1 if a == b else 0 for (a, b) in zip(np.array(correct), y)]
accuracy = np.sum(correct) / len(correct)
print ("Loss after iteration {0}: {1} \t accuracy: {2}".format(i,log_likelihood(model, y), accuracy*100))
# plot_decision_boundary(lambda x: predict(model, x))
# plt.title("Decision Boundary for hidden layer size 3")
# plt.show()