-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DBM - Hinton & Salakhutdinov paper #1239
base: master
Are you sure you want to change the base?
Changes from all commits
f4bdb63
1269ab1
7cfcf8e
e72ebd4
0a1b1f7
d75713b
d4d505c
875a6bd
f37c42a
ae20246
266e406
d30f6f3
64be5e9
8d62920
e466b44
4852189
9f1cf00
b686b29
3e9d06e
b1d46d2
bd3a8f9
70ad04a
058151e
98501f2
4dd7f5e
4d6dbf9
bb6886a
a84be21
f994a69
660aff0
0c86ba3
e5ddec4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
""" | ||
Augmented MNIST wrapper class | ||
""" | ||
|
||
import os | ||
import numpy as np | ||
|
||
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix | ||
from pylearn2.scripts.dbm.augment_input import augment_input | ||
from pylearn2.utils import serial | ||
|
||
|
||
class MNIST_AUGMENTED(DenseDesignMatrix): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should not need a new Dataset object to do this. |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my private repo from grad school, I did this by just re-computing the features each time they're needed. It's only MNIST so on GPU this is not a big deal: You could also write a function that takes a Dataset and returns an augmented Dataset (maybe a VectorSpacesDataset). We can't have a separate class for every dataset and every possible processing of each dataset. That's just too much code to maintain. |
||
""" | ||
Loads MNIST dataset and builds augmented dataset | ||
for DBM discriminative finetuning. | ||
|
||
Parameters | ||
---------- | ||
dataset : `pylearn2.datasets.dataset.Dataset` | ||
which_set : str | ||
Select between training and test set. | ||
model : `pylearn2.models.model.Model` | ||
The DBM to be finetuned. | ||
mf_steps : int | ||
Number of mean field updates for data augmentation. | ||
one_hot : bool, optional | ||
Enable or disable one-hot configuration for | ||
label matrix. | ||
start : int, optional | ||
First index of dataset to be finetuned. | ||
stop : int, optional | ||
Last index of dataset to be finetuned. | ||
save_aug : bool, optional | ||
Select whether to save the augmented dataset | ||
in a pkl file or not. | ||
""" | ||
|
||
def __init__(self, dataset, which_set, model, mf_steps, one_hot=True, | ||
start=None, stop=None, save_aug=False): | ||
|
||
self.path = os.path.join('${PYLEARN2_DATA_PATH}', 'mnist') | ||
self.path = serial.preprocess(self.path) | ||
|
||
try: | ||
if which_set == 'train': | ||
path = os.path.join(self.path, 'aug_train_dump.pkl.gz') | ||
datasets = serial.load(filepath=path) | ||
augmented_X, y = datasets[0], datasets[1] | ||
else: | ||
path = os.path.join(self.path, 'aug_test_dump.pkl.gz') | ||
datasets = serial.load(filepath=path) | ||
augmented_X, y = datasets[0], datasets[1] | ||
augmented_X, y = augmented_X[start:stop], y[start:stop] | ||
except: | ||
X = dataset.X | ||
if one_hot is True: | ||
one_hot = np.zeros((dataset.y.shape[0], 10), dtype='float32') | ||
for i in range(dataset.y.shape[0]): | ||
label = dataset.y[i] | ||
one_hot[i, label] = 1. | ||
y = one_hot | ||
else: | ||
y = dataset.y | ||
|
||
# BUILD AUGMENTED INPUT FOR FINETUNING | ||
X, y = X[start:stop], y[start:stop] | ||
augmented_X = augment_input(X, model, mf_steps) | ||
|
||
if save_aug is True: | ||
datasets = augmented_X, y | ||
if which_set == 'train': | ||
path = os.path.join(self.path, 'aug_train_dump.pkl.gz') | ||
serial.save(filepath=path, obj=datasets) | ||
else: | ||
path = os.path.join(self.path, 'aug_test_dump.pkl.gz') | ||
serial.save(filepath=path, obj=datasets) | ||
|
||
super(MNIST_AUGMENTED, self).__init__(X=augmented_X, y=y) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
""" | ||
This module augments the dataset in order to make it suitable for | ||
DBM discriminative finetuning. | ||
For each example in the dataset, using the provided trained DBM, | ||
it performs n mean-field updates initializing the state of the second | ||
hidden layer of the DBM and augments the example with this state. | ||
It returns a dataset where each example is composed of its previous | ||
value concatenated with the respective initialization of the second | ||
hidden layer of the DBM. | ||
""" | ||
|
||
from pylearn2.utils import sharedX | ||
from theano import function | ||
import numpy | ||
|
||
|
||
def augment_input(X, model, mf_steps): | ||
|
||
""" | ||
Input augmentation script. | ||
|
||
Parameters | ||
---------- | ||
X : ndarray, 2-dimensional | ||
A matrix containing the initial dataset. | ||
model : DBM | ||
The DBM model to be finetuned. It is used for | ||
mean field updates. | ||
mf_steps : int | ||
The number of mean field updates. | ||
|
||
Returns | ||
------- | ||
final_data : ndarray, 2-dimensional | ||
The final augmented dataset. | ||
|
||
References | ||
---------- | ||
Salakhutdinov Ruslan and Hinton Geoffrey. "An efficient | ||
procedure for deep boltzmann machines". 2012. | ||
""" | ||
|
||
print("\nAugmenting data...\n") | ||
|
||
i = 0 | ||
init_data = model.visible_layer.space.get_origin_batch(batch_size=1, | ||
dtype='float32') | ||
|
||
for x in X[:]: | ||
init_data[0] = x | ||
data = sharedX(init_data, name='v') | ||
# mean field inference of second hidden layer | ||
# (niter: number of mean field updates) | ||
marginal_posterior = model.mf(V=data, niter=mf_steps)[1] | ||
mp = function([], marginal_posterior) | ||
mp = mp()[0][0] | ||
if i == 0: | ||
final_data = numpy.asarray([numpy.concatenate((mp, x))]) | ||
else: | ||
final_data = numpy.append(final_data, | ||
[numpy.concatenate((mp, x))], | ||
axis=0) | ||
|
||
i += 1 | ||
|
||
print("Data augmentation complete!") | ||
|
||
return final_data |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
The files in this directory recreate the experiment reported in the | ||
paper | ||
|
||
An efficient learning procedure for Deep Boltzmann Machines. G. Hinton, and R. Salakhutdinov. | ||
|
||
The procedure is divided in three phases: pretraining of RBMs, training and finetuning. The test_dbm_mnist script allows to enable each phase of training and to select whether the DBM | ||
is composed of a softmax layer or not, and whether the MLP has to do finetuning with dropout or not. | ||
This implementation works only for DBMs with 2 hidden layers: the stacking of RBMs to compose the DBM needs some changes to Contrastive Divergence algorithm that have not been implemented here. | ||
However, it has been shown that using more than 2 layers in a DBM, does not guarantee to improve performances. | ||
|
||
As explained in the paper, the finetuning procedure uses an augmented input to feed the MLP and this implementation creates it using augment_input.py and | ||
mnistaugmented.py in pylearn2/datasets/. The latter takes the mnist dataset and augment it. Eventually, it saves .pkl files of the augmented dataset | ||
because data augmentation is a time-consuming operation. | ||
|
||
There are two tests in /tests. The script to run the whole procedure, with all the right parameters, reaches the result published by Hinton & Salakhutdinov. The fast version of it, is | ||
suitable to be run on travis. It does not perform well because it uses a very small training set and a very small number of epochs. | ||
|
||
NO DROPOUT RESULTS: | ||
The test returns a 0.94% test error WITHOUT softmax on the top of the DBM and dropout. | ||
DROPOUT RESULTS: | ||
The test returns a 0.84% test error WITH softmax on the top of the DBM and dropout. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is slightly worse than the average performance Nitish reported. But I haven't been able to reproduce his result either. |
||
|
||
Experiments have been performed on Ubuntu 14.04 LTS using a NVIDIA Tesla C1060 GPU and a 8-core Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz. I used openblas-base and numpy version 1.9.0, | ||
scipy version 0.13.3, theano version 0.6.0 and pylearn2 with 6264 commits. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
!obj:pylearn2.train.Train { | ||
dataset: &data !obj:pylearn2.datasets.binarizer.Binarizer { | ||
raw: &raw_train !obj:pylearn2.datasets.mnist.MNIST { | ||
which_set: "train", | ||
start: 0, | ||
stop: %(train_stop)i | ||
} | ||
}, | ||
model: !obj:pylearn2.models.dbm.DBM { | ||
batch_size: %(batch_size)i, | ||
niter: 10, | ||
inference_procedure: !obj:pylearn2.models.dbm.WeightDoubling {}, | ||
visible_layer: !obj:pylearn2.models.dbm.BinaryVector { | ||
nvis: 784, | ||
}, | ||
hidden_layers: [ | ||
!obj:pylearn2.models.dbm.BinaryVectorMaxPool { | ||
layer_name: 'h1', | ||
detector_layer_dim: %(n_h1)i, | ||
pool_size: 1, | ||
irange: 0.001, | ||
}, | ||
!obj:pylearn2.models.dbm.BinaryVectorMaxPool { | ||
layer_name: 'h2', | ||
detector_layer_dim: %(n_h2)i, | ||
pool_size: 1, | ||
irange: 0.001, | ||
}, | ||
] | ||
}, | ||
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { | ||
learning_rate: 0.005, | ||
learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { | ||
init_momentum: 0.5, | ||
}, | ||
monitoring_batches: %(monitoring_batches)i, | ||
monitoring_dataset: *data, | ||
cost : !obj:pylearn2.costs.cost.SumOfCosts { | ||
costs: [ | ||
!obj:pylearn2.costs.dbm.VariationalPCD { | ||
num_chains: 100, | ||
num_gibbs_steps: 5, | ||
}, | ||
!obj:pylearn2.costs.dbm.WeightDecay { | ||
coeffs: [ .0002, .0002 ], | ||
}, | ||
!obj:pylearn2.costs.dbm.TorontoSparsity { | ||
targets: [ .2, .1 ], | ||
coeffs: [ .001, .001 ], | ||
} | ||
] | ||
}, | ||
termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { | ||
max_epochs: %(max_epochs)i | ||
}, | ||
update_callbacks: [ | ||
!obj:pylearn2.training_algorithms.sgd.CustomizedLROverEpoch { | ||
} | ||
] | ||
}, | ||
extensions: [ | ||
!obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { | ||
final_momentum: 0.9, | ||
start: 1, | ||
saturate: 6, | ||
}, | ||
], | ||
save_path: "%(save_path)s/dbm_mnist.pkl", | ||
save_freq : %(max_epochs)i | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
!obj:pylearn2.train.Train { | ||
dataset: &data !obj:pylearn2.datasets.binarizer.Binarizer { | ||
raw: &raw_train !obj:pylearn2.datasets.mnist.MNIST { | ||
which_set: "train", | ||
start: 0, | ||
stop: %(train_stop)i | ||
} | ||
}, | ||
model: !obj:pylearn2.models.rbm.RBM { | ||
nvis : 784, | ||
nhid : %(nhid)i, | ||
irange : 0.001, | ||
}, | ||
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { | ||
learning_rate : 0.05, | ||
learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { | ||
init_momentum: 0.5, | ||
}, | ||
batch_size : %(batch_size)i, | ||
monitoring_batches : %(monitoring_batches)i, | ||
monitoring_dataset : *data, | ||
cost: !obj:pylearn2.costs.ebm_estimation.CDk { | ||
nsteps : 1, | ||
}, | ||
termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter { | ||
max_epochs: %(max_epochs)i, | ||
}, | ||
}, | ||
extensions: [ | ||
!obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { | ||
start: 1, | ||
saturate: 6, | ||
final_momentum: 0.9, | ||
}, | ||
], | ||
save_path: "%(save_path)s/dbm_mnist_l1.pkl", | ||
save_freq: %(max_epochs)i | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
!obj:pylearn2.train.Train { | ||
dataset: &train !obj:pylearn2.datasets.binarizer.Binarizer { | ||
raw: !obj:pylearn2.datasets.transformer_dataset.TransformerDataset { | ||
raw: !obj:pylearn2.datasets.mnist.MNIST { | ||
which_set: 'train', | ||
start: 0, | ||
stop: %(train_stop)i | ||
}, | ||
transformer: !pkl: "%(save_path)s/dbm_mnist_l1.pkl" | ||
}, | ||
}, | ||
model: !obj:pylearn2.models.rbm.RBM { | ||
nvis : %(nvis)i, | ||
nhid : %(nhid)i, | ||
irange : 0.01, | ||
}, | ||
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { | ||
learning_rate : 0.05, | ||
learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { | ||
init_momentum: 0.5, | ||
}, | ||
batch_size : %(batch_size)i, | ||
monitoring_batches : %(monitoring_batches)i, | ||
monitoring_dataset : *train, | ||
cost : !obj:pylearn2.costs.ebm_estimation.CDk { | ||
nsteps : 5, | ||
}, | ||
termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter { | ||
max_epochs: %(max_epochs)i, | ||
}, | ||
}, | ||
extensions: [ | ||
!obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { | ||
start: 1, | ||
saturate: 6, | ||
final_momentum: 0.9, | ||
}, | ||
], | ||
save_path: "%(save_path)s/dbm_mnist_l2.pkl", | ||
save_freq: %(max_epochs)i | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
!obj:pylearn2.train.Train { | ||
dataset: &train !obj:pylearn2.datasets.mnist.MNIST { | ||
which_set: 'train', | ||
start: 0, | ||
stop: %(train_stop)i | ||
}, | ||
model: !obj:pylearn2.models.mlp.MLP { | ||
batch_size: %(batch_size)i, | ||
layers: [ | ||
!obj:pylearn2.models.mlp.Sigmoid { | ||
layer_name: 'h0', | ||
dim: %(n_h0)i, | ||
sparse_init: 15, | ||
}, | ||
!obj:pylearn2.models.mlp.Sigmoid { | ||
layer_name: 'h1', | ||
dim: %(n_h1)i, | ||
sparse_init: 15, | ||
}, | ||
!obj:pylearn2.models.mlp.Softmax { | ||
layer_name: 'y', | ||
n_classes: 10, | ||
irange: 0.05 | ||
} | ||
], | ||
nvis: %(nvis)i, | ||
}, | ||
algorithm: !obj:pylearn2.training_algorithms.bgd.BGD { | ||
conjugate: 1, | ||
line_search_mode: 'exhaustive', | ||
updates_per_batch: 6, | ||
monitoring_dataset: { | ||
'test': !obj:pylearn2.datasets.mnist.MNIST { | ||
which_set: 'test', | ||
}, | ||
}, | ||
cost: !obj:pylearn2.costs.mlp.Default {}, | ||
termination_criterion: !obj:pylearn2.termination_criteria.And { | ||
criteria: [ | ||
!obj:pylearn2.termination_criteria.EpochCounter { | ||
max_epochs: %(max_epochs)i | ||
} | ||
] | ||
}, | ||
}, | ||
extensions: [ | ||
!obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { | ||
channel_name: 'test_y_misclass', | ||
save_path: "%(save_path)s/dbm_mnist_mlp.pkl", | ||
store_best_model: True | ||
}, | ||
] | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We only use all caps when it's actually an acronym. I would suggest something like AugmentedMNIST.