From a37531e08f152659251068e165ad6db60b0a922a Mon Sep 17 00:00:00 2001 From: Maksymilian Graczyk Date: Tue, 25 Aug 2020 20:22:06 +0200 Subject: [PATCH] Move zeroing layers to inside an operator --- joey/layers.py | 3 +-- joey/net.py | 49 +++++++++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/joey/layers.py b/joey/layers.py index cbfe8a1..1ce9df8 100644 --- a/joey/layers.py +++ b/joey/layers.py @@ -483,8 +483,7 @@ def backprop_equations(self, prev_layer, next_layer): a = self._backward_tmp_constants[0] b = self._backward_tmp_constants[1] - return ([Eq(next_layer.result_gradients, 0), - Eq(a, index // 2), + return ([Eq(a, index // 2), Eq(b, index % 2), Inc(next_layer.result_gradients[dims[0], dims[1], diff --git a/joey/net.py b/joey/net.py index b205fe6..609af66 100644 --- a/joey/net.py +++ b/joey/net.py @@ -68,6 +68,9 @@ def _gen_eqs(self): input_function = None + for layer in self._layers: + eqs.append(Eq(layer.result, 0)) + for layer in self._layers: if input_function is not None: dims = input_function.dimensions @@ -85,6 +88,19 @@ def _gen_backprop_eqs(self): eqs = [] args = [] + for i in range(len(self._layers)): + layer = self._layers[i] + + if layer.kernel_gradients is not None: + eqs.append(Eq(layer.kernel_gradients, 0)) + + if layer.bias_gradients is not None: + eqs.append(Eq(layer.bias_gradients, 0)) + + if layer.result_gradients is not None \ + and i < len(self._layers) - 1: + eqs.append(Eq(layer.result_gradients, 0)) + for i in range(len(self._layers) - 1, -1, -1): if i < len(self._layers) - 1: prev_layer = self._layers[i + 1] @@ -102,6 +118,17 @@ def _gen_backprop_eqs(self): args += layer_args eqs += layer_eqs + batch_size = self._layers[-1].result.shape[1] + + for layer in self._layers: + if layer.kernel_gradients is not None: + eqs.append(Eq(layer.kernel_gradients, + layer.kernel_gradients / batch_size)) + + if layer.bias_gradients is not None: + eqs.append(Eq(layer.bias_gradients, + layer.bias_gradients / batch_size)) + return (eqs, args) @property @@ -118,9 +145,6 @@ def forward(self, input_data): input_data : np.ndarray Input data for the network. """ - for layer in self._layers: - layer.result.data[:] = 0 - self._layers[0].input.data[:] = input_data self._forward_operator.apply(**self._forward_arg_dict) return self._layers[-1].result.data @@ -154,29 +178,10 @@ def backward(self, expected, loss_gradient_func, pytorch_optimizer=None): The default value is None. """ - for layer in self._layers: - if layer.kernel_gradients is not None: - layer.kernel_gradients.data[:] = 0 - - if layer.bias_gradients is not None: - layer.bias_gradients.data[:] = 0 - - if layer.result_gradients is not None: - layer.result_gradients.data[:] = 0 - - batch_size = self._layers[-1].result.shape[1] - self._layers[-1].result_gradients.data[:] = \ np.transpose(np.array(loss_gradient_func(self._layers[-1], expected))) self._backward_operator.apply(**self._backward_arg_dict) - for layer in self._layers: - if layer.kernel_gradients is not None: - layer.kernel_gradients.data[:] /= batch_size - - if layer.bias_gradients is not None: - layer.bias_gradients.data[:] /= batch_size - if pytorch_optimizer is not None: pytorch_optimizer.step()