From ca1c7cfd4b47c8bd268c77c1f24a788eaca38d3d Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 23 Jul 2015 16:18:17 -0400 Subject: [PATCH 01/16] Very first draft of the tutorial to create a brick. --- docs/create_your_own_brick.rst | 218 +++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 219 insertions(+) create mode 100644 docs/create_your_own_brick.rst diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst new file mode 100644 index 00000000..a0b67d5d --- /dev/null +++ b/docs/create_your_own_brick.rst @@ -0,0 +1,218 @@ +Create your own brick +===================== + +This tutorial explains how to create a custom brick, which is useful if you +want to factorize a specific sequence of operations (which can be made of +bricks themselves) into a single operation so that you can easily reuse it. + +The first part of this tutorial lists the requirements and optional components +that a brick should/can implement while the second part describes the +construction step by step of a simple toy brick. + +This tutorial assumes that you are already familiar with +:doc:`bricks `. + + +Bricks ingredients and recipe +----------------------------- + +All the bricks in blocks inherit directly or indirectly from the +:class:`.Brick`. However there is already a rich inheritance hierarchy of +bricks implemented in blocks and thus, you should consider which brick level +you wish to inherit from. Bear it mind that multiple inheritance is often +possible and advocated whenever it makes sense. + +Here are examples of possible bricks to inherit from: + +* :class:`.Sequence`: a sequence of bricks. +* :class:`.Initializable`: a brick that defines a same initialiation scheme + (weights and biases) for all its children. +* :class:`.Feedforward`: declares an interface for bricks with one input and + one output. +* :class:`.Linear`: a linear transformation with optional bias. Inherits from + :class:`.Initializable` and :class:`.Feedforward`. +* many mores! + +Let's say that you want to create a brick from scracth, simply inheriting +from :class:`.Brick`, then you should consider overwriting the following +methods (strictly speaking, all these methods are optional): + +* :meth:`.Brick.__init__`: you should pass by argument the attributes of your + bricks. It is also in this method that you should create the potential + "children bricks" that belongs to your brick. The initialiazation of the + attributes can be lazy as described in a further paragraph. +* :meth:`you_decide_which_name`: you need to implement a method that actually + implements the operation of the brick, taking as arguments the inputs + of the brick and returning its outputs. It can have any name and for simple + bricks is often named ``apply``. You can decorate it with the + :func:`.application` decorator, as explained in the next section. +* :meth:`.Brick._allocate`: you should implement this method if your brick + needs to allocate its parameters. +* :meth:`.Brick._initialize`: you should implement this method if you need to + initialize parameters of your brick. +* :meth:`.Brick._push_allocation_config`: you should consider overwriting + this method if you want to allocate the children bricks in a specific way. +* :meth:`.Brick._push_initialization_config`: you should consider method if + you want to initialize the children bricks in a specific way. +* :meth:`.Brick.get_dim`: this method is useful to get the dimensions of the + inputs and outputs of the brick. + +If you want to inherit from a specific brick, check its docstring to +identify the particular methods to overwrite. + +you_decide_which_name method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :func:`.application` decorator can be used to name variables and +register auxiliary variables to the operation you implement. It is used as +followed: + + >>> class Foo(Brick): + ... @application(inputs=['input1', 'input2'], outputs=['output']) + ... def apply(self, input1, input2) + ... ... + ... return something + +In the case above, it will automatically label the theano tensor variable +input1 to ``Foo_apply_input1``, idem for input2 and the output of the method. + +Under the hood, the ``@application`` decorator creates an object of class +:class:`.Application`, named ``apply``, which becomes an attribute of the brick. + +In the previous examples, variables were named with strings. If you need to +name certain variables with other variables (such as ``self .fancy_name``), +you should define them with the apply.property decorator: + + >>> class Foo(Brick): + ... fancy_name = "salut_toi" + ... @application + ... def apply(self, input) + ... ... + ... @apply.property('inputs') + ... def apply_inputs(self): + ... return self.fancy_name + +You can also annotate specific variables, as shown in this example: + + >>> class Foo(Brick): + ... @application + ... def apply(self, x, application_call): + ... application_call.add_auxiliary_variable(x.mean()) + ... return x + 1 + +You can retrieve these annotated variables as usual with the computational +graph. + + +Lazy initialization +~~~~~~~~~~~~~~~~~~~ + +Instead of forcing the user to provide all the brick attributes as arguments +to the :meth:`.Brick.__init__` method, you could let him/her specify them +later, after the creation of the brick. To enable this mecanism, called lazy +initialization, you need to decorate the method :meth:`.Brick.__init__` with +the :func:`.lazy` decorator: + + >>> @lazy(allocation=['attr1', 'attr2']) + ... def __init__(self, attr1, attr1) + ... ... + +This allows the user to specify attr1 and attr2 after the creation of the brick. + + +Example +------- + +For the sake of the tutorial, let's consider a toy operation that takes two +batch inputs and multiply them respectively by two matrices, resulting in two +outputs. + +The first step is to identify which brick to inherit from. Clearly we are +implementing a variant of the :class:`.Linear` brick. Contrary to +:class:`.Linear`, ours has two inputs and two outputs, which means that we can +not inherit from :class:`.Feedforward`, which requires a single input and a +single output. Our brick will have to manage two shared variables +representing the matrices to multiply the inputs with and thus, inheriting from +:class:`.Initializable` makes perfectly sense as we will let the user decide +which initialization scheme to use. TODO + + >>> class ParallelLinear(Initializable): + ... r"""Two linear transformations without biases. + ... + ... Brick which applies two linear (affine) transformations by + ... multiplying its + ... two inputs with two weight matrices, resulting in two outputs. + ... The two inputs, weights and outputs can have different dimensions. + ... + ... Parameters + ... ---------- + ... input_dim{1,2} : int + ... The dimensions of the two inputs. + ... output_dim{1,2} : int + ... The dimension of the two outputs. + ... """ + ... @lazy(allocation=['input_dim1', 'input_dim2', + ... 'output_dim1', 'output_dim2']) + ... def __init__(self, input_dim1, input_dim2, output_dim1, output_dim2, + ... **kwargs): + ... super(ParallelLinear, self).__init__(**kwargs) + ... self.input_dim1 = input_dim1 + ... self.input_dim2 = input_dim2 + ... self.output_dim1 = output_dim1 + ... self.output_dim2 = output_dim2 + ... + ... @property + ... def W1(self): + ... return self.parameters[0] + ... + ... @property + ... def W2(self): + ... return self.parameters[0] + ... + ... def __allocate(self, input_dim, output_dim, number): + ... W = shared_floatx_nans((input_dim, output_dim), + ... name='W'+number) + ... add_role(W, WEIGHT) + ... self.parameters.append(W) + ... self.add_auxiliary_variable(W.norm(2), name='W'+number+'_norm') + ... + ... def _allocate(self): + ... self.__allocate(self.input_dim1, self.output_dim1, '1') + ... self.__allocate(self.input_dim2, self.output_dim2, '2') + ... + ... def _initialize(self): + ... W1, W2 = self.parameters + ... self.weights_init.initialize(W1, self.rng) + ... self.weights_init.initialize(W2, self.rng) + ... + ... @application(inputs=['input1_', 'input2_'], outputs=['output1', + ... 'output2']) + ... def apply(self, input1_, input2_): + ... """Apply the two linear transformations. + ... + ... Parameters + ... ---------- + ... input{1,2}_ : :class:`~tensor.TensorVariable` + ... The two inputs on which to apply the transformations + ... + ... Returns + ... ------- + ... output{1,2} : :class:`~tensor.TensorVariable` + ... The two inputs multiplied by their respective matrices + ... + ... """ + ... W1, W2 = self.parameters + ... output1 = tensor.dot(input1_, W1) + ... output2 = tensor.dot(input2_, W2) + ... return output1, output2 + ... + ... def get_dim(self, name): + ... if name == 'input1_': + ... return self.input_dim1 + ... if name == 'input2_': + ... return self.input_dim2 + ... if name == 'output1': + ... return self.output_dim1 + ... if name == 'output2': + ... return self.output_dim2 + ... super(ParallelLinear, self).get_dim(name) diff --git a/docs/index.rst b/docs/index.rst index c3f756ca..1227db72 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,6 +44,7 @@ In-depth rnn configuration + create_your_own_brick serialization api/index.rst development/index.rst From 627da45affa8b58da086dce7da897b19cc012cc5 Mon Sep 17 00:00:00 2001 From: AdeB Date: Mon, 27 Jul 2015 09:43:30 -0400 Subject: [PATCH 02/16] Refactoring. Brick example with children. --- docs/create_your_own_brick.rst | 91 +++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index a0b67d5d..9f3730aa 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -7,7 +7,7 @@ bricks themselves) into a single operation so that you can easily reuse it. The first part of this tutorial lists the requirements and optional components that a brick should/can implement while the second part describes the -construction step by step of a simple toy brick. +construction of a simple toy brick. This tutorial assumes that you are already familiar with :doc:`bricks `. @@ -38,13 +38,14 @@ from :class:`.Brick`, then you should consider overwriting the following methods (strictly speaking, all these methods are optional): * :meth:`.Brick.__init__`: you should pass by argument the attributes of your - bricks. It is also in this method that you should create the potential - "children bricks" that belongs to your brick. The initialiazation of the - attributes can be lazy as described in a further paragraph. + brick. It is also in this method that you should create the potential + "children bricks" that belongs to your brick (in that case, you have to put + the children bricks into ``self.children``. The initialiazation of the + attributes can be lazy as described later in the tutorial. * :meth:`you_decide_which_name`: you need to implement a method that actually implements the operation of the brick, taking as arguments the inputs of the brick and returning its outputs. It can have any name and for simple - bricks is often named ``apply``. You can decorate it with the + bricks is often named ``apply``. You should decorate it with the :func:`.application` decorator, as explained in the next section. * :meth:`.Brick._allocate`: you should implement this method if your brick needs to allocate its parameters. @@ -52,22 +53,28 @@ methods (strictly speaking, all these methods are optional): initialize parameters of your brick. * :meth:`.Brick._push_allocation_config`: you should consider overwriting this method if you want to allocate the children bricks in a specific way. -* :meth:`.Brick._push_initialization_config`: you should consider method if - you want to initialize the children bricks in a specific way. -* :meth:`.Brick.get_dim`: this method is useful to get the dimensions of the - inputs and outputs of the brick. +* :meth:`.Brick._push_initialization_config`: you should consider + overwriting this method if you want to initialize the children bricks in a + specific way. You should inherit from :class:`.Initializable` to initialize + the potential children bricks recursively. +* :meth:`.Brick.get_dim`: implementing this function is useful if you want + to provide a simple way to get the dimensions of the inputs and outputs of + the brick. If you want to inherit from a specific brick, check its docstring to -identify the particular methods to overwrite. +identify the particular methods to overwrite and the attributes to define. you_decide_which_name method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :func:`.application` decorator can be used to name variables and -register auxiliary variables to the operation you implement. It is used as -followed: +The :meth:`you_decide_which_name` method described above is probably the most +important method of your brick because it is the one that actually takes +theano tensors as inputs, process them and return tensor outputs. You should +decorate it with the :func:`.application` decorator, which names variables +and register auxiliary variables of the operation you implement. +It is used as followed (:meth:`you_decide_which_name` is named :meth:`apply`): - >>> class Foo(Brick): + >>> class Foo(Brick): # doctest: +SKIP ... @application(inputs=['input1', 'input2'], outputs=['output']) ... def apply(self, input1, input2) ... ... @@ -77,13 +84,14 @@ In the case above, it will automatically label the theano tensor variable input1 to ``Foo_apply_input1``, idem for input2 and the output of the method. Under the hood, the ``@application`` decorator creates an object of class -:class:`.Application`, named ``apply``, which becomes an attribute of the brick. +:class:`.Application`, named ``apply``, which becomes an attribute of the +brick. In the previous examples, variables were named with strings. If you need to name certain variables with other variables (such as ``self .fancy_name``), you should define them with the apply.property decorator: - >>> class Foo(Brick): + >>> class Foo(Brick): # doctest: +SKIP ... fancy_name = "salut_toi" ... @application ... def apply(self, input) @@ -92,9 +100,10 @@ you should define them with the apply.property decorator: ... def apply_inputs(self): ... return self.fancy_name -You can also annotate specific variables, as shown in this example: +You can also annotate specific variables by passing ``application_call`` as +agurment of your ``apply`` function, as shown in this example: - >>> class Foo(Brick): + >>> class Foo(Brick): # doctest: +SKIP ... @application ... def apply(self, x, application_call): ... application_call.add_auxiliary_variable(x.mean()) @@ -113,11 +122,12 @@ later, after the creation of the brick. To enable this mecanism, called lazy initialization, you need to decorate the method :meth:`.Brick.__init__` with the :func:`.lazy` decorator: - >>> @lazy(allocation=['attr1', 'attr2']) + >>> @lazy(allocation=['attr1', 'attr2']) # doctest: +SKIP ... def __init__(self, attr1, attr1) ... ... -This allows the user to specify attr1 and attr2 after the creation of the brick. +This allows the user to specify attr1 and attr2 after the creation of the +brick. Example @@ -134,9 +144,9 @@ not inherit from :class:`.Feedforward`, which requires a single input and a single output. Our brick will have to manage two shared variables representing the matrices to multiply the inputs with and thus, inheriting from :class:`.Initializable` makes perfectly sense as we will let the user decide -which initialization scheme to use. TODO +which initialization scheme to use. - >>> class ParallelLinear(Initializable): + >>> class ParallelLinear(Initializable): # doctest: +SKIP ... r"""Two linear transformations without biases. ... ... Brick which applies two linear (affine) transformations by @@ -161,14 +171,6 @@ which initialization scheme to use. TODO ... self.output_dim1 = output_dim1 ... self.output_dim2 = output_dim2 ... - ... @property - ... def W1(self): - ... return self.parameters[0] - ... - ... @property - ... def W2(self): - ... return self.parameters[0] - ... ... def __allocate(self, input_dim, output_dim, number): ... W = shared_floatx_nans((input_dim, output_dim), ... name='W'+number) @@ -216,3 +218,32 @@ which initialization scheme to use. TODO ... if name == 'output2': ... return self.output_dim2 ... super(ParallelLinear, self).get_dim(name) + + +One can also create the brick using :class:`Linear` children bricks, which +gives a more compact version: + + >>> from blocks.bricks import Linear # doctest: +SKIP + >>> class ParallelLinear2(Initializable): # doctest: +SKIP + ... def __init__(self, input_dim1, input_dim2, output_dim1, output_dim2, + ... **kwargs): ... + ... super(ParallelLinear2, self).__init__(**kwargs) + ... self.linear1 = Linear(input_dim1, output_dim1, + ... use_bias=False, **kwargs) + ... self.linear2 = Linear(input_dim2, output_dim2, + ... use_bias=False, **kwargs) + ... self.children = [self.linear1, self.linear2] + ... + ... @application(inputs=['input1_', 'input2_'], outputs=['output1', + ... 'output2']) + ... def apply(self, input1_, input2_): + ... output1 = self.linear1.apply(input1_) + ... output2 = self.linear2.apply(input2_) + ... return output1, output2 + ... + ... def get_dim(self, name): + ... if name in ['input1_', 'output1']: + ... return self.linear1.get_dim(name) + ... if name in ['input2_', 'output2']: + ... return self.linear2.get_dim(name) + ... super(ParallelLinear2, self).get_dim(name) \ No newline at end of file From 5628491edab975315f7ab90ff0f13576da75fcdc Mon Sep 17 00:00:00 2001 From: AdeB Date: Sat, 1 Aug 2015 17:14:22 -0400 Subject: [PATCH 03/16] More explanations in the tutorial to create a custom brick --- docs/create_your_own_brick.rst | 122 +++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 22 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index 9f3730aa..054c4e6a 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -2,25 +2,26 @@ Create your own brick ===================== This tutorial explains how to create a custom brick, which is useful if you -want to factorize a specific sequence of operations (which can be made of -bricks themselves) into a single operation so that you can easily reuse it. +want to factorize a specific several combined operations (which can be made of +bricks themselves) into a single higher-level operation so that you can easily +reuse it. The first part of this tutorial lists the requirements and optional components that a brick should/can implement while the second part describes the construction of a simple toy brick. This tutorial assumes that you are already familiar with -:doc:`bricks `. +:doc:`bricks ` and how to use them from a user point of view. Bricks ingredients and recipe ----------------------------- All the bricks in blocks inherit directly or indirectly from the -:class:`.Brick`. However there is already a rich inheritance hierarchy of +:class:`.Brick`. There is already a rich inheritance hierarchy of bricks implemented in blocks and thus, you should consider which brick level -you wish to inherit from. Bear it mind that multiple inheritance is often -possible and advocated whenever it makes sense. +to inherit from. Bear it mind that multiple inheritance is often possible and +advocated whenever it makes sense. Here are examples of possible bricks to inherit from: @@ -31,6 +32,8 @@ Here are examples of possible bricks to inherit from: one output. * :class:`.Linear`: a linear transformation with optional bias. Inherits from :class:`.Initializable` and :class:`.Feedforward`. +* :class:`.BaseRecurrent`: the base class for recurrent bricks. Check the + :doc:`tutorial about rnns` for more information. * many mores! Let's say that you want to create a brick from scracth, simply inheriting @@ -40,17 +43,23 @@ methods (strictly speaking, all these methods are optional): * :meth:`.Brick.__init__`: you should pass by argument the attributes of your brick. It is also in this method that you should create the potential "children bricks" that belongs to your brick (in that case, you have to put - the children bricks into ``self.children``. The initialiazation of the + the children bricks into ``self.children``). The initialiazation of the attributes can be lazy as described later in the tutorial. * :meth:`you_decide_which_name`: you need to implement a method that actually implements the operation of the brick, taking as arguments the inputs of the brick and returning its outputs. It can have any name and for simple bricks is often named ``apply``. You should decorate it with the - :func:`.application` decorator, as explained in the next section. -* :meth:`.Brick._allocate`: you should implement this method if your brick - needs to allocate its parameters. -* :meth:`.Brick._initialize`: you should implement this method if you need to - initialize parameters of your brick. + :func:`.application` decorator, as explained in the next section. If you + design a recurrent brick, you should instead decorate it with the + :func:`.recurrent` decorator as explained in the + :doc:`tutorial about rnns`. +* :meth:`.Brick._allocate`: you should implement this method to allocate the + shared variables (often representing parameters) of the brick. In blocks, + by convention, the built-in bricks allocate their shared variables with nan + values and we recommand you to do the same. +* :meth:`.Brick._initialize`: you should implement this method to initialize + the shared variables of your brick. This method is called after the + allocation. * :meth:`.Brick._push_allocation_config`: you should consider overwriting this method if you want to allocate the children bricks in a specific way. * :meth:`.Brick._push_initialization_config`: you should consider @@ -67,9 +76,9 @@ identify the particular methods to overwrite and the attributes to define. you_decide_which_name method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :meth:`you_decide_which_name` method described above is probably the most +The :meth:`you_decide_which_name` method listed above is probably the most important method of your brick because it is the one that actually takes -theano tensors as inputs, process them and return tensor outputs. You should +theano tensors as inputs, process them and return output tensors. You should decorate it with the :func:`.application` decorator, which names variables and register auxiliary variables of the operation you implement. It is used as followed (:meth:`you_decide_which_name` is named :meth:`apply`): @@ -81,7 +90,8 @@ It is used as followed (:meth:`you_decide_which_name` is named :meth:`apply`): ... return something In the case above, it will automatically label the theano tensor variable -input1 to ``Foo_apply_input1``, idem for input2 and the output of the method. +``input1`` to ``Foo_apply_input1``, idem for ``input2`` and the output of the +method. Under the hood, the ``@application`` decorator creates an object of class :class:`.Application`, named ``apply``, which becomes an attribute of the @@ -101,7 +111,7 @@ you should define them with the apply.property decorator: ... return self.fancy_name You can also annotate specific variables by passing ``application_call`` as -agurment of your ``apply`` function, as shown in this example: +argument of your ``apply`` function, as shown in this example: >>> class Foo(Brick): # doctest: +SKIP ... @application @@ -133,6 +143,21 @@ brick. Example ------- +.. doctest:: + :hide: + + >>> import numpy + >>> + >>> import theano + >>> from theano import tensor + >>> + >>> from blocks.bricks import Initializable, Linear + >>> from blocks.bricks.base import lazy, application + >>> from blocks.bricks.parallel import Parallel + >>> from blocks.initialization import Constant + >>> from blocks.roles import add_role, WEIGHT + >>> from blocks.utils import shared_floatx_nans + For the sake of the tutorial, let's consider a toy operation that takes two batch inputs and multiply them respectively by two matrices, resulting in two outputs. @@ -146,7 +171,8 @@ representing the matrices to multiply the inputs with and thus, inheriting from :class:`.Initializable` makes perfectly sense as we will let the user decide which initialization scheme to use. - >>> class ParallelLinear(Initializable): # doctest: +SKIP + + >>> class ParallelLinear(Initializable): ... r"""Two linear transformations without biases. ... ... Brick which applies two linear (affine) transformations by @@ -219,14 +245,33 @@ which initialization scheme to use. ... return self.output_dim2 ... super(ParallelLinear, self).get_dim(name) +You can test the brick as follows: + + >>> input_dim1, input_dim2, output_dim1, output_dim2 = 10, 5, 2, 1 + >>> batch_size1, batch_size2 = 1, 2 + >>> + >>> x1_mat = 3 * numpy.ones((batch_size1, input_dim1), + ... dtype=theano.config.floatX) + >>> x2_mat = 4 * numpy.ones((batch_size2, input_dim2), + ... dtype=theano.config.floatX) + >>> + >>> x1 = theano.tensor.matrix('x1') + >>> x2 = theano.tensor.matrix('x2') + >>> parallel1 = ParallelLinear(input_dim1, input_dim2, output_dim1, + ... output_dim2, weights_init=Constant(2)) + >>> parallel1.initialize() + >>> output1, output2 = parallel1.apply(x1, x2) + >>> + >>> f1 = theano.function([x1, x2], [output1, output2]) + >>> f1(x1_mat, x2_mat) # doctest: +ELLIPSIS + [array([[ 60., 60.]]...), array([[ 40.], + [ 40.]]...)] One can also create the brick using :class:`Linear` children bricks, which -gives a more compact version: - >>> from blocks.bricks import Linear # doctest: +SKIP - >>> class ParallelLinear2(Initializable): # doctest: +SKIP + >>> class ParallelLinear2(Initializable): ... def __init__(self, input_dim1, input_dim2, output_dim1, output_dim2, - ... **kwargs): ... + ... **kwargs): ... super(ParallelLinear2, self).__init__(**kwargs) ... self.linear1 = Linear(input_dim1, output_dim1, ... use_bias=False, **kwargs) @@ -246,4 +291,37 @@ gives a more compact version: ... return self.linear1.get_dim(name) ... if name in ['input2_', 'output2']: ... return self.linear2.get_dim(name) - ... super(ParallelLinear2, self).get_dim(name) \ No newline at end of file + ... super(ParallelLinear2, self).get_dim(name) + +You can test this new version as follows: + + >>> parallel2 = ParallelLinear2(input_dim1, input_dim2, output_dim1, + ... output_dim2, weights_init=Constant(2)) + >>> parallel2.initialize() + >>> output1, output2 = parallel2.apply(x1, x2) + >>> + >>> f2 = theano.function([x1, x2], [output1, output2]) + >>> f2(x1_mat, x2_mat) # doctest: +ELLIPSIS + [array([[ 60., 60.]]...), array([[ 40.], + [ 40.]]...)] + +Actually it was not even necessary to create a custom brick for this particular +operation as blocks always have a brick, called :class:``Parallel``, that +applies the same brick to several inputs. In our case the brick we want to +apply to our two inputs is a :class:``Linear`` brick with no bias: + + >>> parallel3 = Parallel( + ... prototype=Linear(use_bias=False), + ... input_names=['input1_', 'input2_'], + ... input_dims=[input_dim1, input_dim2], + ... output_dims=[output_dim1, output_dim2], weights_init=Constant(2)) + >>> parallel3.initialize() + >>> + >>> output1, output2 = parallel3.apply(x1, x2) + >>> + >>> f3 = theano.function([x1, x2], [output1, output2]) + >>> f3(x1_mat, x2_mat) # doctest: +ELLIPSIS + [array([[ 60., 60.]]...), array([[ 40.], + [ 40.]]...)] + + From 53979deb9047f0be395762e91edfcfdba64e82b4 Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 17 Sep 2015 16:12:31 -0400 Subject: [PATCH 04/16] Example of lazy brick with get_dim --- docs/create_your_own_brick.rst | 60 ++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index 054c4e6a..fdb4c970 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -20,7 +20,7 @@ Bricks ingredients and recipe All the bricks in blocks inherit directly or indirectly from the :class:`.Brick`. There is already a rich inheritance hierarchy of bricks implemented in blocks and thus, you should consider which brick level -to inherit from. Bear it mind that multiple inheritance is often possible and +to inherit from. Bear in mind that multiple inheritance is often possible and advocated whenever it makes sense. Here are examples of possible bricks to inherit from: @@ -34,7 +34,7 @@ Here are examples of possible bricks to inherit from: :class:`.Initializable` and :class:`.Feedforward`. * :class:`.BaseRecurrent`: the base class for recurrent bricks. Check the :doc:`tutorial about rnns` for more information. -* many mores! +* many more! Let's say that you want to create a brick from scracth, simply inheriting from :class:`.Brick`, then you should consider overwriting the following @@ -45,7 +45,7 @@ methods (strictly speaking, all these methods are optional): "children bricks" that belongs to your brick (in that case, you have to put the children bricks into ``self.children``). The initialiazation of the attributes can be lazy as described later in the tutorial. -* :meth:`you_decide_which_name`: you need to implement a method that actually +* :meth:`apply`: you need to implement a method that actually implements the operation of the brick, taking as arguments the inputs of the brick and returning its outputs. It can have any name and for simple bricks is often named ``apply``. You should decorate it with the @@ -73,15 +73,15 @@ methods (strictly speaking, all these methods are optional): If you want to inherit from a specific brick, check its docstring to identify the particular methods to overwrite and the attributes to define. -you_decide_which_name method +apply method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :meth:`you_decide_which_name` method listed above is probably the most +The :meth:`apply` method listed above is probably the most important method of your brick because it is the one that actually takes theano tensors as inputs, process them and return output tensors. You should decorate it with the :func:`.application` decorator, which names variables and register auxiliary variables of the operation you implement. -It is used as followed (:meth:`you_decide_which_name` is named :meth:`apply`): +It is used as follows: >>> class Foo(Brick): # doctest: +SKIP ... @application(inputs=['input1', 'input2'], outputs=['output']) @@ -120,12 +120,19 @@ argument of your ``apply`` function, as shown in this example: ... return x + 1 You can retrieve these annotated variables as usual with the computational -graph. +graph :class:`.ComputationGraph` and filters :class:`.VariableFilter`. Lazy initialization ~~~~~~~~~~~~~~~~~~~ +.. doctest:: + :hide: + + >>> from blocks.bricks import Feedforward, Linear + >>> from blocks.bricks.base import lazy, application + >>> from blocks.initialization import Constant + Instead of forcing the user to provide all the brick attributes as arguments to the :meth:`.Brick.__init__` method, you could let him/her specify them later, after the creation of the brick. To enable this mecanism, called lazy @@ -137,7 +144,40 @@ the :func:`.lazy` decorator: ... ... This allows the user to specify attr1 and attr2 after the creation of the -brick. +brick. For example, the following ``DoubleSequential`` brick is composed of two +:class:`.Feedforward` bricks for which you do not need to specify the +``input_dim`` of brick2 directly at its creation. + + >>> class DoubleSequential(Feedforward): + ... """ + ... Two sequential Feedforward bricks. + ... """ + ... def __init__(self, brick1, brick2, **kwargs): + ... super(Feedforward, self).__init__(**kwargs) + ... self.brick1 = brick1 + ... self.brick2 = brick2 + ... self.input_dim = brick1.input_dim + ... self.output_dim = brick2.output_dim + ... self.children = [self.brick1, self.brick2] + ... + ... def _push_allocation_config(self): + ... self.brick2.input_dim = self.brick1.get_dim('output') + ... + ... @application + ... def apply(self, x): + ... return self.brick2.apply(self.brick1.apply(x)) + +Note how ``get_dim`` is used to retrieve the ``input_dim`` of ``brick1``. You +can now create a DoubleSeuential brick as follows. + + >>> brick1 = Linear(input_dim=3, output_dim=2, use_bias=False, + ... weights_init=Constant(2)) + >>> brick2 = Linear(output_dim=4, use_bias=False, weights_init=Constant(2)) + >>> + >>> seq = DoubleSequential(brick1, brick2) + >>> seq.initialize() + >>> brick2.input_dim + 2 Example @@ -176,8 +216,8 @@ which initialization scheme to use. ... r"""Two linear transformations without biases. ... ... Brick which applies two linear (affine) transformations by - ... multiplying its - ... two inputs with two weight matrices, resulting in two outputs. + ... multiplying its two inputs with two weight matrices, resulting in + ... two outputs. ... The two inputs, weights and outputs can have different dimensions. ... ... Parameters From 536bf1891277da2923da4d0e1a771becba5581bc Mon Sep 17 00:00:00 2001 From: serdyuk Date: Fri, 18 Sep 2015 13:57:25 -0400 Subject: [PATCH 05/16] Make small fixes in create your own brick --- docs/create_your_own_brick.rst | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index fdb4c970..a5ffb01e 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -98,8 +98,8 @@ Under the hood, the ``@application`` decorator creates an object of class brick. In the previous examples, variables were named with strings. If you need to -name certain variables with other variables (such as ``self .fancy_name``), -you should define them with the apply.property decorator: +name certain variables with other variables (such as ``self.fancy_name``), +you should define them with the ``apply.property`` decorator: >>> class Foo(Brick): # doctest: +SKIP ... fancy_name = "salut_toi" @@ -108,8 +108,12 @@ you should define them with the apply.property decorator: ... ... ... @apply.property('inputs') ... def apply_inputs(self): + ... # Note that you can use any python code to define the name ... return self.fancy_name +Using application calls +""""""""""""""""""""""" + You can also annotate specific variables by passing ``application_call`` as argument of your ``apply`` function, as shown in this example: @@ -135,23 +139,21 @@ Lazy initialization Instead of forcing the user to provide all the brick attributes as arguments to the :meth:`.Brick.__init__` method, you could let him/her specify them -later, after the creation of the brick. To enable this mecanism, called lazy -initialization, you need to decorate the method :meth:`.Brick.__init__` with -the :func:`.lazy` decorator: +later, after the creation of the brick. To enable this mechanism, +called lazy initialization, you need to decorate the constructor with the +:func:`.lazy` decorator: >>> @lazy(allocation=['attr1', 'attr2']) # doctest: +SKIP ... def __init__(self, attr1, attr1) ... ... -This allows the user to specify attr1 and attr2 after the creation of the -brick. For example, the following ``DoubleSequential`` brick is composed of two -:class:`.Feedforward` bricks for which you do not need to specify the -``input_dim`` of brick2 directly at its creation. +This allows the user to specify ``attr1`` and ``attr2`` after the creation of +the brick. For example, the following ``DoubleSequential`` brick is composed of +two :class:`.Feedforward` bricks for which you do not need to +specify the ``input_dim`` of ``brick2`` directly at its creation. >>> class DoubleSequential(Feedforward): - ... """ - ... Two sequential Feedforward bricks. - ... """ + ... """Two sequential Feedforward bricks.""" ... def __init__(self, brick1, brick2, **kwargs): ... super(Feedforward, self).__init__(**kwargs) ... self.brick1 = brick1 @@ -168,7 +170,7 @@ brick. For example, the following ``DoubleSequential`` brick is composed of two ... return self.brick2.apply(self.brick1.apply(x)) Note how ``get_dim`` is used to retrieve the ``input_dim`` of ``brick1``. You -can now create a DoubleSeuential brick as follows. +can now use a ``DoubleSeuential`` brick as follows. >>> brick1 = Linear(input_dim=3, output_dim=2, use_bias=False, ... weights_init=Constant(2)) @@ -291,14 +293,14 @@ You can test the brick as follows: >>> batch_size1, batch_size2 = 1, 2 >>> >>> x1_mat = 3 * numpy.ones((batch_size1, input_dim1), - ... dtype=theano.config.floatX) + ... dtype=theano.config.floatX) >>> x2_mat = 4 * numpy.ones((batch_size2, input_dim2), - ... dtype=theano.config.floatX) + ... dtype=theano.config.floatX) >>> >>> x1 = theano.tensor.matrix('x1') >>> x2 = theano.tensor.matrix('x2') >>> parallel1 = ParallelLinear(input_dim1, input_dim2, output_dim1, - ... output_dim2, weights_init=Constant(2)) + ... output_dim2, weights_init=Constant(2)) >>> parallel1.initialize() >>> output1, output2 = parallel1.apply(x1, x2) >>> @@ -336,7 +338,7 @@ One can also create the brick using :class:`Linear` children bricks, which You can test this new version as follows: >>> parallel2 = ParallelLinear2(input_dim1, input_dim2, output_dim1, - ... output_dim2, weights_init=Constant(2)) + ... output_dim2, weights_init=Constant(2)) >>> parallel2.initialize() >>> output1, output2 = parallel2.apply(x1, x2) >>> @@ -364,4 +366,3 @@ apply to our two inputs is a :class:``Linear`` brick with no bias: [array([[ 60., 60.]]...), array([[ 40.], [ 40.]]...)] - From 5da46969ea06c5ad57bf0eaec4846e5b2bff35b6 Mon Sep 17 00:00:00 2001 From: serdyuk Date: Fri, 18 Sep 2015 13:59:46 -0400 Subject: [PATCH 06/16] Separate paragraph --- docs/create_your_own_brick.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index a5ffb01e..dff943d7 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -74,7 +74,7 @@ If you want to inherit from a specific brick, check its docstring to identify the particular methods to overwrite and the attributes to define. apply method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~ The :meth:`apply` method listed above is probably the most important method of your brick because it is the one that actually takes @@ -97,6 +97,9 @@ Under the hood, the ``@application`` decorator creates an object of class :class:`.Application`, named ``apply``, which becomes an attribute of the brick. +Application properties +"""""""""""""""""""""" + In the previous examples, variables were named with strings. If you need to name certain variables with other variables (such as ``self.fancy_name``), you should define them with the ``apply.property`` decorator: From d57683150180eb9dbab38cf77afd867884124e44 Mon Sep 17 00:00:00 2001 From: AdeB Date: Mon, 21 Sep 2015 10:13:05 -0400 Subject: [PATCH 07/16] Small fixes in create your own brick tutorial --- docs/create_your_own_brick.rst | 49 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index dff943d7..03423401 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -56,16 +56,21 @@ methods (strictly speaking, all these methods are optional): * :meth:`.Brick._allocate`: you should implement this method to allocate the shared variables (often representing parameters) of the brick. In blocks, by convention, the built-in bricks allocate their shared variables with nan - values and we recommand you to do the same. + values and we recommend you to do the same. * :meth:`.Brick._initialize`: you should implement this method to initialize the shared variables of your brick. This method is called after the allocation. * :meth:`.Brick._push_allocation_config`: you should consider overwriting - this method if you want to allocate the children bricks in a specific way. + this method if you want to change configuration of the children bricks + before they allocate their parameters. * :meth:`.Brick._push_initialization_config`: you should consider - overwriting this method if you want to initialize the children bricks in a - specific way. You should inherit from :class:`.Initializable` to initialize - the potential children bricks recursively. + overwriting this method if you want to change the initialization schemes of + the children before they get initialized. + If the children bricks need to be initialized with the same scheme, then you + should inherit your brick from :class:`.Initializable`, which + automatically push the initialization schemes of your brick (provided as + arguments ``weights_init`` and ``biases_init`` of the constructor) to the + children bricks. * :meth:`.Brick.get_dim`: implementing this function is useful if you want to provide a simple way to get the dimensions of the inputs and outputs of the brick. @@ -73,8 +78,8 @@ methods (strictly speaking, all these methods are optional): If you want to inherit from a specific brick, check its docstring to identify the particular methods to overwrite and the attributes to define. -apply method -~~~~~~~~~~~~ +Application methods +~~~~~~~~~~~~~~~~~~~ The :meth:`apply` method listed above is probably the most important method of your brick because it is the one that actually takes @@ -95,17 +100,22 @@ method. Under the hood, the ``@application`` decorator creates an object of class :class:`.Application`, named ``apply``, which becomes an attribute of the -brick. +brick class (by opposition to class instances). Application properties """""""""""""""""""""" -In the previous examples, variables were named with strings. If you need to -name certain variables with other variables (such as ``self.fancy_name``), -you should define them with the ``apply.property`` decorator: +In the previous examples, the names of the arguments of the application methods +were directly provided as arguments of the ``@application`` decorator because +they were common to all instances of the classes. On the other hand, if these +names need to be defined differently for particular instances of the class, +you should use the ``apply.property`` decorator. Let's say that we want to +name our attribute inputs with the string ``self.fancy_name``, then we should +write: >>> class Foo(Brick): # doctest: +SKIP - ... fancy_name = "salut_toi" + ... def __init__(self, fancy_name): + ... self.fancy_name = fancy_name ... @application ... def apply(self, input) ... ... @@ -117,8 +127,9 @@ you should define them with the ``apply.property`` decorator: Using application calls """"""""""""""""""""""" -You can also annotate specific variables by passing ``application_call`` as -argument of your ``apply`` function, as shown in this example: +If you need to add auxiliary variables to the computation graph +:class:`.ComputationGraph`, you need to pass ``application_call`` as argument +of your ``apply`` function, as shown in this example: >>> class Foo(Brick): # doctest: +SKIP ... @application @@ -212,9 +223,11 @@ implementing a variant of the :class:`.Linear` brick. Contrary to :class:`.Linear`, ours has two inputs and two outputs, which means that we can not inherit from :class:`.Feedforward`, which requires a single input and a single output. Our brick will have to manage two shared variables -representing the matrices to multiply the inputs with and thus, inheriting from -:class:`.Initializable` makes perfectly sense as we will let the user decide -which initialization scheme to use. +representing the matrices to multiply the inputs. As we want to initialize them +with the same scheme, we should inherit from :class:`.Initializable`, which +automatically push the initialization schemes to the children. The +initialization schemes are provided as arguments ``weights_init`` +and ``biases_init`` of the constructor of our brick (in the ``kwargs``). >>> class ParallelLinear(Initializable): @@ -305,6 +318,8 @@ You can test the brick as follows: >>> parallel1 = ParallelLinear(input_dim1, input_dim2, output_dim1, ... output_dim2, weights_init=Constant(2)) >>> parallel1.initialize() + >>> # The weights_init initialization scheme is pushed to the children + >>> # bricks. >>> output1, output2 = parallel1.apply(x1, x2) >>> >>> f1 = theano.function([x1, x2], [output1, output2]) From 8bf07e78d112678b1a272c37c5b26929776f3aa5 Mon Sep 17 00:00:00 2001 From: AdeB Date: Tue, 22 Sep 2015 16:00:07 -0400 Subject: [PATCH 08/16] typos --- docs/create_your_own_brick.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index 03423401..726583f1 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -223,9 +223,9 @@ implementing a variant of the :class:`.Linear` brick. Contrary to :class:`.Linear`, ours has two inputs and two outputs, which means that we can not inherit from :class:`.Feedforward`, which requires a single input and a single output. Our brick will have to manage two shared variables -representing the matrices to multiply the inputs. As we want to initialize them -with the same scheme, we should inherit from :class:`.Initializable`, which -automatically push the initialization schemes to the children. The +representing the matrices to multiply the inputs with. As we want to initialize +them with the same scheme, we should inherit from :class:`.Initializable`, +which automatically push the initialization schemes to the children. The initialization schemes are provided as arguments ``weights_init`` and ``biases_init`` of the constructor of our brick (in the ``kwargs``). @@ -366,9 +366,10 @@ You can test this new version as follows: [ 40.]]...)] Actually it was not even necessary to create a custom brick for this particular -operation as blocks always have a brick, called :class:``Parallel``, that -applies the same brick to several inputs. In our case the brick we want to -apply to our two inputs is a :class:``Linear`` brick with no bias: +operation as blocks has a brick, called :class:``Parallel``, which +applies the same prototype brick to several inputs. In our case the prototype +brick we want to apply to our two inputs is a :class:``Linear`` brick with no +bias: >>> parallel3 = Parallel( ... prototype=Linear(use_bias=False), From a06878f63cd46820876c40fdd18d1e3d75bae6e4 Mon Sep 17 00:00:00 2001 From: rizar Date: Wed, 21 Oct 2015 16:45:02 -0400 Subject: [PATCH 09/16] Bump version number --- blocks/__init__.py | 2 +- docs/conf.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/blocks/__init__.py b/blocks/__init__.py index 37850e3c..58b4a231 100644 --- a/blocks/__init__.py +++ b/blocks/__init__.py @@ -2,4 +2,4 @@ # Scary warning: Adding code to this file can break namespace packages # See https://pythonhosted.org/setuptools/setuptools.html#namespace-packages __import__("pkg_resources").declare_namespace(__name__) -__version__ = '0.0.1' +__version__ = '0.1.0' diff --git a/docs/conf.py b/docs/conf.py index 57942d1f..0e60edb8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -94,9 +94,9 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = '0.0' +version = 0.1 # The full version, including alpha/beta/rc tags. -release = '0.0.1' +release = 0.1.0 # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 6ebcb37e733c06b4df5b941414093d8bef879401 Mon Sep 17 00:00:00 2001 From: Jose Manuel Date: Wed, 21 Oct 2015 17:18:21 -0400 Subject: [PATCH 10/16] Added documentation about the emitter cost in sequence generators. --- blocks/bricks/sequence_generators.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py index b08607e5..7421d23d 100644 --- a/blocks/bricks/sequence_generators.py +++ b/blocks/bricks/sequence_generators.py @@ -582,6 +582,18 @@ class AbstractEmitter(Brick): :class:`SoftmaxEmitter` : for integer outputs + Notes + ----- + An important detail about the emitter cost is that it will be evaluated + with inputs of different dimensions so it has to be flexible enough to + handle this. The two ways in which it can be applied are: + + 1. Inside the cost_matrix function of :class:`BaseSequenceGenerator` + where it will be applied to the whole sequence at once. + + 2. Inside the generate function of :class:`BaseSequenceGenerator` + where it will be only applied to one step ot the sequence. + """ @abstractmethod def emit(self, readouts): From efe3585aab15157ffe50006a0a57a3575304816e Mon Sep 17 00:00:00 2001 From: Jose Manuel Date: Wed, 21 Oct 2015 17:44:05 -0400 Subject: [PATCH 11/16] Solved the flake8 mistake. --- blocks/bricks/sequence_generators.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py index 7421d23d..56391103 100644 --- a/blocks/bricks/sequence_generators.py +++ b/blocks/bricks/sequence_generators.py @@ -584,14 +584,15 @@ class AbstractEmitter(Brick): Notes ----- - An important detail about the emitter cost is that it will be evaluated - with inputs of different dimensions so it has to be flexible enough to - handle this. The two ways in which it can be applied are: + An important detail about the emitter cost is that it will be + evaluated with inputs of different dimensions so it has to be + flexible enough to handle this. The two ways in which it can be + applied are: - 1. Inside the cost_matrix function of :class:`BaseSequenceGenerator` + 1. In the cost_matrix function of :class:`BaseSequenceGenerator` where it will be applied to the whole sequence at once. - - 2. Inside the generate function of :class:`BaseSequenceGenerator` + + 2. In the generate function of :class:`BaseSequenceGenerator` where it will be only applied to one step ot the sequence. """ From 6c99076b77f3aa2f3353604a9a0d5ea0818b105f Mon Sep 17 00:00:00 2001 From: Jose Manuel Date: Thu, 22 Oct 2015 12:25:17 -0400 Subject: [PATCH 12/16] Added the missing links. --- blocks/bricks/sequence_generators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py index 56391103..d43807d6 100644 --- a/blocks/bricks/sequence_generators.py +++ b/blocks/bricks/sequence_generators.py @@ -589,11 +589,11 @@ class AbstractEmitter(Brick): flexible enough to handle this. The two ways in which it can be applied are: - 1. In the cost_matrix function of :class:`BaseSequenceGenerator` - where it will be applied to the whole sequence at once. + 1. In :meth:BaseSequenceGenerator.cost_matrix where it will + be applied to the whole sequence at once. - 2. In the generate function of :class:`BaseSequenceGenerator` - where it will be only applied to one step ot the sequence. + 2. In :meth:BaseSequenceGenerator.generate where it will be + applied to only one step of the sequence. """ @abstractmethod From e43cfbe733923a152c313570fff549444abbed2a Mon Sep 17 00:00:00 2001 From: rizar Date: Fri, 23 Oct 2015 09:50:46 -0400 Subject: [PATCH 13/16] Correct main loop message for the interrupt --- blocks/main_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blocks/main_loop.py b/blocks/main_loop.py index 54620d3e..b519a254 100644 --- a/blocks/main_loop.py +++ b/blocks/main_loop.py @@ -27,7 +27,7 @@ epoch_interrupt_message = """ -Blocks will complete this epoch iteration of training and run extensions \ +Blocks will complete this epoch of training and run extensions \ before exiting. If you do not want to complete this epoch, press CTRL + C \ again to stop training after the current batch.""" From 5dc2bdd7adaad96e1665b9e967afa98e81ae4ba1 Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 22 Oct 2015 11:13:47 -0400 Subject: [PATCH 14/16] brick tutorial improvements --- docs/create_your_own_brick.rst | 162 +++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 58 deletions(-) diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst index 726583f1..d67b0ce5 100644 --- a/docs/create_your_own_brick.rst +++ b/docs/create_your_own_brick.rst @@ -1,10 +1,24 @@ Create your own brick ===================== +.. doctest:: + :hide: + + >>> import numpy + >>> + >>> import theano + >>> from theano import tensor + >>> + >>> from blocks.bricks import Brick, Initializable, Linear, Feedforward + >>> from blocks.bricks.base import lazy, application + >>> from blocks.bricks.parallel import Parallel + >>> from blocks.initialization import Constant + >>> from blocks.roles import add_role, WEIGHT + >>> from blocks.utils import shared_floatx_nans + This tutorial explains how to create a custom brick, which is useful if you -want to factorize a specific several combined operations (which can be made of -bricks themselves) into a single higher-level operation so that you can easily -reuse it. +want to group several specific operations (which can be bricks themselves) into +a single one so that you can easily reuse it. The first part of this tutorial lists the requirements and optional components that a brick should/can implement while the second part describes the @@ -17,9 +31,9 @@ This tutorial assumes that you are already familiar with Bricks ingredients and recipe ----------------------------- -All the bricks in blocks inherit directly or indirectly from the +All the bricks in Blocks inherit directly or indirectly from the :class:`.Brick`. There is already a rich inheritance hierarchy of -bricks implemented in blocks and thus, you should consider which brick level +bricks implemented in Blocks and thus, you should consider which brick level to inherit from. Bear in mind that multiple inheritance is often possible and advocated whenever it makes sense. @@ -36,9 +50,10 @@ Here are examples of possible bricks to inherit from: :doc:`tutorial about rnns` for more information. * many more! -Let's say that you want to create a brick from scracth, simply inheriting +Let's say that you want to create a brick from scratch, simply inheriting from :class:`.Brick`, then you should consider overwriting the following -methods (strictly speaking, all these methods are optional): +methods (strictly speaking, all these methods are optional, check the docstring +of :class:`.Brick` for a precise description of the life-cycle of a brick): * :meth:`.Brick.__init__`: you should pass by argument the attributes of your brick. It is also in this method that you should create the potential @@ -54,7 +69,7 @@ methods (strictly speaking, all these methods are optional): :func:`.recurrent` decorator as explained in the :doc:`tutorial about rnns`. * :meth:`.Brick._allocate`: you should implement this method to allocate the - shared variables (often representing parameters) of the brick. In blocks, + shared variables (often representing parameters) of the brick. In Blocks, by convention, the built-in bricks allocate their shared variables with nan values and we recommend you to do the same. * :meth:`.Brick._initialize`: you should implement this method to initialize @@ -68,7 +83,7 @@ methods (strictly speaking, all these methods are optional): the children before they get initialized. If the children bricks need to be initialized with the same scheme, then you should inherit your brick from :class:`.Initializable`, which - automatically push the initialization schemes of your brick (provided as + automatically pushes the initialization schemes of your brick (provided as arguments ``weights_init`` and ``biases_init`` of the constructor) to the children bricks. * :meth:`.Brick.get_dim`: implementing this function is useful if you want @@ -88,19 +103,42 @@ decorate it with the :func:`.application` decorator, which names variables and register auxiliary variables of the operation you implement. It is used as follows: - >>> class Foo(Brick): # doctest: +SKIP + >>> class Foo(Brick): ... @application(inputs=['input1', 'input2'], outputs=['output']) - ... def apply(self, input1, input2) - ... ... - ... return something - -In the case above, it will automatically label the theano tensor variable -``input1`` to ``Foo_apply_input1``, idem for ``input2`` and the output of the -method. + ... def apply(self, input1, input2): + ... y = input1 + input2 + ... return y + +In the case above, it will automatically rename the theano tensor variable +``input1`` to ``Foo_apply_input1``, ``input2`` to ``Foo_apply_input2`` and the +output of the method to ``foo_apply_output``. It will also add roles and names +to the tag attributes of the variables, as shown below: + + >>> foo = Foo() + >>> i1 = tensor.matrix('i1') + >>> i2 = tensor.matrix('i2') + >>> y = foo.apply(i1, i2) + >>> theano.printing.debugprint(y) + Elemwise{identity} [@A] 'foo_apply_output' + |Elemwise{add,no_inplace} [@B] '' + |Elemwise{identity} [@C] 'foo_apply_input1' + | |i1 [@D] + |Elemwise{identity} [@E] 'foo_apply_input2' + |i2 [@F] + >>> print(y.name) + foo_apply_output + >>> print(y.tag.name) + output + >>> print(y.tag.roles) + [OUTPUT] Under the hood, the ``@application`` decorator creates an object of class :class:`.Application`, named ``apply``, which becomes an attribute of the -brick class (by opposition to class instances). +brick class (by opposition to class instances): + + >>> print(type(Foo.apply)) + + Application properties """""""""""""""""""""" @@ -108,7 +146,7 @@ Application properties In the previous examples, the names of the arguments of the application methods were directly provided as arguments of the ``@application`` decorator because they were common to all instances of the classes. On the other hand, if these -names need to be defined differently for particular instances of the class, +names need to be defined differently for particular instances of the class, you should use the ``apply.property`` decorator. Let's say that we want to name our attribute inputs with the string ``self.fancy_name``, then we should write: @@ -127,30 +165,33 @@ write: Using application calls """"""""""""""""""""""" -If you need to add auxiliary variables to the computation graph -:class:`.ComputationGraph`, you need to pass ``application_call`` as argument -of your ``apply`` function, as shown in this example: +You may want to save particular variables defined in the ``apply`` method in +order to use them later, for example to monitor them during training. For that, +you need to pass ``application_call`` as argument of your ``apply`` function +and use the ``add_auxiliary_variable`` function to register your variables of +interest, as shown in this example: - >>> class Foo(Brick): # doctest: +SKIP + >>> class Foo(Brick): ... @application ... def apply(self, x, application_call): ... application_call.add_auxiliary_variable(x.mean()) ... return x + 1 -You can retrieve these annotated variables as usual with the computational -graph :class:`.ComputationGraph` and filters :class:`.VariableFilter`. +``add_auxiliary_variable`` annotates the variable ``x.mean()`` as an auxiliary +variable and you can thus later retrieve it with the computational graph +:class:`.ComputationGraph` and filters :class:`.VariableFilter`. In the +case of the ``Foo`` Brick defined above, we retrieve ``x.mean() as follows: + >>> from blocks.graph import ComputationGraph + >>> x = tensor.fmatrix('x') + >>> y = Foo().apply(x) + >>> cg = ComputationGraph(y) + >>> print(cg.auxiliary_variables) + [mean] Lazy initialization ~~~~~~~~~~~~~~~~~~~ -.. doctest:: - :hide: - - >>> from blocks.bricks import Feedforward, Linear - >>> from blocks.bricks.base import lazy, application - >>> from blocks.initialization import Constant - Instead of forcing the user to provide all the brick attributes as arguments to the :meth:`.Brick.__init__` method, you could let him/her specify them later, after the creation of the brick. To enable this mechanism, @@ -162,20 +203,34 @@ called lazy initialization, you need to decorate the constructor with the ... ... This allows the user to specify ``attr1`` and ``attr2`` after the creation of -the brick. For example, the following ``DoubleSequential`` brick is composed of -two :class:`.Feedforward` bricks for which you do not need to +the brick. For example, the following ``ChainOfTwoFeedforward`` brick is +composed of two :class:`.Feedforward` bricks for which you do not need to specify the ``input_dim`` of ``brick2`` directly at its creation. - >>> class DoubleSequential(Feedforward): + >>> class ChainOfTwoFeedforward(Feedforward): ... """Two sequential Feedforward bricks.""" ... def __init__(self, brick1, brick2, **kwargs): ... super(Feedforward, self).__init__(**kwargs) ... self.brick1 = brick1 ... self.brick2 = brick2 - ... self.input_dim = brick1.input_dim - ... self.output_dim = brick2.output_dim ... self.children = [self.brick1, self.brick2] ... + ... @property + ... def input_dim(self): + ... return self.brick1.input_dim + ... + ... @input_dim.setter + ... def input_dim(self, value): + ... self.brick1.input_dim = value + ... + ... @property + ... def output_dim(self): + ... return self.brick2.output_dim + ... + ... @output_dim.setter + ... def output_dim(self, value): + ... self.brick2.output_dim = value + ... ... def _push_allocation_config(self): ... self.brick2.input_dim = self.brick1.get_dim('output') ... @@ -184,13 +239,13 @@ specify the ``input_dim`` of ``brick2`` directly at its creation. ... return self.brick2.apply(self.brick1.apply(x)) Note how ``get_dim`` is used to retrieve the ``input_dim`` of ``brick1``. You -can now use a ``DoubleSeuential`` brick as follows. +can now use a ``ChainOfTwoFeedforward`` brick as follows. >>> brick1 = Linear(input_dim=3, output_dim=2, use_bias=False, ... weights_init=Constant(2)) >>> brick2 = Linear(output_dim=4, use_bias=False, weights_init=Constant(2)) >>> - >>> seq = DoubleSequential(brick1, brick2) + >>> seq = ChainOfTwoFeedforward(brick1, brick2) >>> seq.initialize() >>> brick2.input_dim 2 @@ -199,23 +254,8 @@ can now use a ``DoubleSeuential`` brick as follows. Example ------- -.. doctest:: - :hide: - - >>> import numpy - >>> - >>> import theano - >>> from theano import tensor - >>> - >>> from blocks.bricks import Initializable, Linear - >>> from blocks.bricks.base import lazy, application - >>> from blocks.bricks.parallel import Parallel - >>> from blocks.initialization import Constant - >>> from blocks.roles import add_role, WEIGHT - >>> from blocks.utils import shared_floatx_nans - For the sake of the tutorial, let's consider a toy operation that takes two -batch inputs and multiply them respectively by two matrices, resulting in two +batch inputs and multiplies them respectively by two matrices, resulting in two outputs. The first step is to identify which brick to inherit from. Clearly we are @@ -318,8 +358,6 @@ You can test the brick as follows: >>> parallel1 = ParallelLinear(input_dim1, input_dim2, output_dim1, ... output_dim2, weights_init=Constant(2)) >>> parallel1.initialize() - >>> # The weights_init initialization scheme is pushed to the children - >>> # bricks. >>> output1, output2 = parallel1.apply(x1, x2) >>> >>> f1 = theano.function([x1, x2], [output1, output2]) @@ -358,6 +396,14 @@ You can test this new version as follows: >>> parallel2 = ParallelLinear2(input_dim1, input_dim2, output_dim1, ... output_dim2, weights_init=Constant(2)) >>> parallel2.initialize() + >>> # The weights_init initialization scheme is pushed to the children + >>> # bricks. We can verify it as follows. + >>> w = parallel2.weights_init + >>> w0 = parallel2.children[0].weights_init + >>> w1 = parallel2.children[1].weights_init + >>> print(w == w0 == w1) + True + >>> >>> output1, output2 = parallel2.apply(x1, x2) >>> >>> f2 = theano.function([x1, x2], [output1, output2]) @@ -366,7 +412,7 @@ You can test this new version as follows: [ 40.]]...)] Actually it was not even necessary to create a custom brick for this particular -operation as blocks has a brick, called :class:``Parallel``, which +operation as Blocks has a brick, called :class:``Parallel``, which applies the same prototype brick to several inputs. In our case the prototype brick we want to apply to our two inputs is a :class:``Linear`` brick with no bias: From 0a4e3b38b59eca220267da49798802efa102db82 Mon Sep 17 00:00:00 2001 From: rizar Date: Tue, 27 Oct 2015 09:44:27 -0400 Subject: [PATCH 15/16] Fixes conf.py and bumps version to 0.1.1 --- blocks/__init__.py | 2 +- docs/conf.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/blocks/__init__.py b/blocks/__init__.py index 58b4a231..90230562 100644 --- a/blocks/__init__.py +++ b/blocks/__init__.py @@ -2,4 +2,4 @@ # Scary warning: Adding code to this file can break namespace packages # See https://pythonhosted.org/setuptools/setuptools.html#namespace-packages __import__("pkg_resources").declare_namespace(__name__) -__version__ = '0.1.0' +__version__ = '0.1.1' diff --git a/docs/conf.py b/docs/conf.py index 0e60edb8..a726e04c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -94,9 +94,9 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = 0.1 +version = '0.1' # The full version, including alpha/beta/rc tags. -release = 0.1.0 +release = '0.1.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 69bc6134beb72ada8d85097569ffb0c991a568c1 Mon Sep 17 00:00:00 2001 From: Anton Bakhtin Date: Tue, 27 Oct 2015 18:42:49 +0300 Subject: [PATCH 16/16] Add get_dim to Bidirectional Required to use in conjunction with RecurrentStack --- blocks/bricks/recurrent.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/blocks/bricks/recurrent.py b/blocks/bricks/recurrent.py index 498912a2..d49df50f 100644 --- a/blocks/bricks/recurrent.py +++ b/blocks/bricks/recurrent.py @@ -648,6 +648,11 @@ def apply(self, *args, **kwargs): def apply_delegate(self): return self.children[0].apply + def get_dim(self, name): + if name in self.apply.outputs: + return self.prototype.get_dim(name) * 2 + return self.prototype.get_dim(name) + RECURRENTSTACK_SEPARATOR = '#'