Skip to content

Commit

Permalink
Merge pull request #9 from devitocodes/zero_in_op
Browse files Browse the repository at this point in the history
Move zeroing layers to inside an operator
  • Loading branch information
georgebisbas authored Sep 2, 2020
2 parents 6fcd1ac + a37531e commit bde8eb5
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
3 changes: 1 addition & 2 deletions joey/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,7 @@ def backprop_equations(self, prev_layer, next_layer):
a = self._backward_tmp_constants[0]
b = self._backward_tmp_constants[1]

return ([Eq(next_layer.result_gradients, 0),
Eq(a, index // 2),
return ([Eq(a, index // 2),
Eq(b, index % 2),
Inc(next_layer.result_gradients[dims[0],
dims[1],
Expand Down
49 changes: 27 additions & 22 deletions joey/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def _gen_eqs(self):

input_function = None

for layer in self._layers:
eqs.append(Eq(layer.result, 0))

for layer in self._layers:
if input_function is not None:
dims = input_function.dimensions
Expand All @@ -85,6 +88,19 @@ def _gen_backprop_eqs(self):
eqs = []
args = []

for i in range(len(self._layers)):
layer = self._layers[i]

if layer.kernel_gradients is not None:
eqs.append(Eq(layer.kernel_gradients, 0))

if layer.bias_gradients is not None:
eqs.append(Eq(layer.bias_gradients, 0))

if layer.result_gradients is not None \
and i < len(self._layers) - 1:
eqs.append(Eq(layer.result_gradients, 0))

for i in range(len(self._layers) - 1, -1, -1):
if i < len(self._layers) - 1:
prev_layer = self._layers[i + 1]
Expand All @@ -102,6 +118,17 @@ def _gen_backprop_eqs(self):
args += layer_args
eqs += layer_eqs

batch_size = self._layers[-1].result.shape[1]

for layer in self._layers:
if layer.kernel_gradients is not None:
eqs.append(Eq(layer.kernel_gradients,
layer.kernel_gradients / batch_size))

if layer.bias_gradients is not None:
eqs.append(Eq(layer.bias_gradients,
layer.bias_gradients / batch_size))

return (eqs, args)

@property
Expand All @@ -118,9 +145,6 @@ def forward(self, input_data):
input_data : np.ndarray
Input data for the network.
"""
for layer in self._layers:
layer.result.data[:] = 0

self._layers[0].input.data[:] = input_data
self._forward_operator.apply(**self._forward_arg_dict)
return self._layers[-1].result.data
Expand Down Expand Up @@ -154,29 +178,10 @@ def backward(self, expected, loss_gradient_func, pytorch_optimizer=None):
The default value is None.
"""
for layer in self._layers:
if layer.kernel_gradients is not None:
layer.kernel_gradients.data[:] = 0

if layer.bias_gradients is not None:
layer.bias_gradients.data[:] = 0

if layer.result_gradients is not None:
layer.result_gradients.data[:] = 0

batch_size = self._layers[-1].result.shape[1]

self._layers[-1].result_gradients.data[:] = \
np.transpose(np.array(loss_gradient_func(self._layers[-1],
expected)))
self._backward_operator.apply(**self._backward_arg_dict)

for layer in self._layers:
if layer.kernel_gradients is not None:
layer.kernel_gradients.data[:] /= batch_size

if layer.bias_gradients is not None:
layer.bias_gradients.data[:] /= batch_size

if pytorch_optimizer is not None:
pytorch_optimizer.step()

0 comments on commit bde8eb5

Please sign in to comment.