update config

llSourcell · Nov 13, 2015 · b626b22 · b626b22
2 parents f5c4c5b + 95f8359
commit b626b22
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -55,3 +55,6 @@ docs/_build/
 
 # PyBuilder
 target/
+
+# Ignore changes to configuration file
+config.py
diff --git a/README.md b/README.md
@@ -59,14 +59,15 @@ This code is written in python. To use it you will need:
 * [Lasagne](https://github.com/Lasagne/Lasagne)
 * A version of Theano that Lasagne supports
 
-Note that a GPU is required.
+For running on CPU, you will need to install [Caffe](http://caffe.berkeleyvision.org) and its python interface.
+
 
 ## Getting started
 
 You will first need to download some pre-trained models and style vectors. Most of the materials are available in a single compressed file, which you can obtain by running
 
     wget http://www.cs.toronto.edu/~rkiros/neural_storyteller.zip
-    
+
 Included is a pre-trained decoder on romance novels, the decoder dictionary, caption and romance style vectors, MS COCO training captions and a pre-trained image-sentence embedding model.
 
 Next, you need to obtain the pre-trained skip-thoughts encoder. Go [here](https://github.com/ryankiros/skip-thoughts) and follow the instructions on the main page to obtain the pre-trained model.
@@ -75,7 +76,16 @@ Finally, we need the VGG-19 ConvNet parameters. You can obtain them by running
 
     wget https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg19.pkl
 
-Note that this model is for non-commercial use only. Once you have all the materials, open `generate.py` and specify the locations of all of the models and style vectors that you downloaded.
+Note that this model is for non-commercial use only. Once you have all the materials, open `config.py` and specify the locations of all of the models and style vectors that you downloaded.
+
+For running on CPU, you will need to download the VGG-19 prototxt and model by:
+
+    wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel
+    wget https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt
+
+ You also need to modify pycaffe and model path in `config.py`, and modify the flag in line 8 as:
+
+    FLAG_CPU_MODE = True
 
 ## Generating a story
 
@@ -95,10 +105,10 @@ There are 2 knobs that can be tuned for generation: the number of retrieved capt
 
 where k is the number of captions to condition on and bw is the beam width. These are reasonable defaults but playing around with these can give you very different outputs! The higher the beam width, the longer it takes to generate a story.
 
-If you bias by song lyrics, you can turn on the lyric flag which will print the output in multiple lines by comma delimiting. `neural_storyteller.zip` contains an additional bias vector called `swift_style.npy` which is the mean of skip-thought vectors across Taylor Swift lyrics. If you point `path_to_posbias` to this vector in `generate.py`, you can generate captions in the style of Taylor Swift lyrics. For example:
+If you bias by song lyrics, you can turn on the lyric flag which will print the output in multiple lines by comma delimiting. `neural_storyteller.zip` contains an additional bias vector called `swift_style.npy` which is the mean of skip-thought vectors across Taylor Swift lyrics. If you point `path_to_posbias` to this vector in `config.py`, you can generate captions in the style of Taylor Swift lyrics. For example:
 
     generate.story(z, './images/ex1.jpg', lyric=True)
-    
+
 should output
 
     You re the only person on the beach right now
@@ -107,7 +117,7 @@ should output
     and when the sea breeze hits me
     I thought
     Hey
-    
+
 ## Reference
 
 This project does not have any associated paper with it. If you found this code useful, please consider citing:
@@ -120,7 +130,7 @@ Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel, Antonio Torralba,
       journal={arXiv preprint arXiv:1506.06726},
       year={2015}
     }
-    
+
 If you also use the BookCorpus data for training new models, please also consider citing:
 
 Yukun Zhu, Ryan Kiros, Richard Zemel, Ruslan Salakhutdinov, Raquel Urtasun, Antonio Torralba, Sanja Fidler.

diff --git a/config.py b/config.py
@@ -0,0 +1,38 @@
+"""
+Configuration for the generate module
+"""
+
+#-----------------------------------------------------------------------------#
+# Flags for running on CPU
+#-----------------------------------------------------------------------------#
+FLAG_CPU_MODE = True
+
+#-----------------------------------------------------------------------------#
+# Paths to models and biases
+#-----------------------------------------------------------------------------#
+paths = dict()
+
+# Skip-thoughts
+paths['skmodels'] = '/u/rkiros/public_html/models/'
+paths['sktables'] = '/u/rkiros/public_html/models/'
+
+# Decoder
+paths['decmodel'] = '/ais/gobi3/u/rkiros/storyteller/romance.npz'
+paths['dictionary'] = '/ais/gobi3/u/rkiros/storyteller/romance_dictionary.pkl'
+
+# Image-sentence embedding
+paths['vsemodel'] = '/ais/gobi3/u/rkiros/storyteller/coco_embedding.npz'
+
+# VGG-19 convnet
+paths['vgg'] = '/ais/gobi3/u/rkiros/vgg/vgg19.pkl'
+paths['pycaffe'] = '/u/yukun/Projects/caffe-run/python'
+paths['vgg_proto_caffe'] = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers_deploy.prototxt'
+paths['vgg_model_caffe'] = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers.caffemodel'
+
+
+# COCO training captions
+paths['captions'] = '/ais/gobi3/u/rkiros/storyteller/coco_train_caps.txt'
+
+# Biases
+paths['negbias'] = '/ais/gobi3/u/rkiros/storyteller/caption_style.npy'
+paths['posbias'] = '/ais/gobi3/u/rkiros/storyteller/romance_style.npy'
diff --git a/generate.py b/generate.py
@@ -11,12 +11,15 @@
 import decoder
 import embedding
 
+import config
+
 import lasagne
 from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer, DropoutLayer
-from lasagne.layers.corrmm import Conv2DMMLayer as ConvLayer
 from lasagne.layers import MaxPool2DLayer as PoolLayer
 from lasagne.nonlinearities import softmax
 from lasagne.utils import floatX
+if not config.FLAG_CPU_MODE:
+    from lasagne.layers.corrmm import Conv2DMMLayer as ConvLayer
 
 from scipy import optimize, stats
 from collections import OrderedDict, defaultdict, Counter
@@ -27,37 +30,6 @@
 from PIL import ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 
-FLAG_CPU_MODE = False
-
-#-----------------------------------------------------------------------------#
-# Specify model paths and biases here
-#-----------------------------------------------------------------------------#
-
-# Skip-thoughts
-path_to_skmodels = '/u/rkiros/public_html/models/'
-path_to_sktables = '/u/rkiros/public_html/models/'
-
-# Decoder
-path_to_decmodel = '/ais/gobi3/u/rkiros/storyteller/romance.npz'
-path_to_dictionary = '/ais/gobi3/u/rkiros/storyteller/romance_dictionary.pkl'
-
-# Image-sentence embedding
-path_to_vsemodel = '/ais/gobi3/u/rkiros/storyteller/coco_embedding.npz'
-
-# VGG-19 convnet
-path_to_vgg = '/ais/gobi3/u/rkiros/vgg/vgg19.pkl'
-caffe_path = '/u/yukun/Projects/caffe-run/python'
-path_to_vgg_proto_caffe = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers_deploy.prototxt'
-path_to_vgg_model_caffe = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers.caffemodel'
-
-# COCO training captions
-path_to_captions = '/ais/gobi3/u/rkiros/storyteller/coco_train_caps.txt'
-
-# Biases
-path_to_negbias = '/ais/gobi3/u/rkiros/storyteller/caption_style.npy'
-path_to_posbias = '/ais/gobi3/u/rkiros/storyteller/romance_style.npy'
-
-#-----------------------------------------------------------------------------#
 
 def story(z, image_loc, k=100, bw=50, lyric=False):
     """
@@ -106,35 +78,39 @@ def load_all():
     """
     Load everything we need for generating
     """
-    print path_to_decmodel
+    print config.paths['decmodel']
 
     # Skip-thoughts
     print 'Loading skip-thoughts...'
-    stv = skipthoughts.load_model(path_to_skmodels, path_to_sktables)
+    stv = skipthoughts.load_model(config.paths['skmodels'],
+                                  config.paths['sktables'])
 
     # Decoder
     print 'Loading decoder...'
-    dec = decoder.load_model(path_to_decmodel, path_to_dictionary)
+    dec = decoder.load_model(config.paths['decmodel'],
+                             config.paths['dictionary'])
 
     # Image-sentence embedding
     print 'Loading image-sentence embedding...'
-    vse = embedding.load_model(path_to_vsemodel)
+    vse = embedding.load_model(config.paths['vsemodel'])
 
     # VGG-19
     print 'Loading and initializing ConvNet...'
-    if FLAG_CPU_MODE:
-        sys.path.insert(0, caffe_path)
+
+    if config.FLAG_CPU_MODE:
+        sys.path.insert(0, config.paths['pycaffe'])
         import caffe
         caffe.set_mode_cpu()
-        net = caffe.Net(path_to_vgg_proto_caffe, path_to_vgg_model_caffe,
+        net = caffe.Net(config.paths['vgg_proto_caffe'],
+                        config.paths['vgg_model_caffe'],
                         caffe.TEST)
     else:
-        net = build_convnet(path_to_vgg)
+        net = build_convnet(config.paths['vgg'])
 
     # Captions
     print 'Loading captions...'
     cap = []
-    with open(path_to_captions, 'rb') as f:
+    with open(config.paths['captions'], 'rb') as f:
         for line in f:
             cap.append(line.strip())
 
@@ -144,8 +120,8 @@ def load_all():
 
     # Biases
     print 'Loading biases...'
-    bneg = numpy.load(path_to_negbias)
-    bpos = numpy.load(path_to_posbias)
+    bneg = numpy.load(config.paths['negbias'])
+    bpos = numpy.load(config.paths['posbias'])
 
     # Pack up
     z = {}
@@ -197,7 +173,7 @@ def compute_features(net, im):
     """
     Compute fc7 features for im
     """
-    if FLAG_CPU_MODE:
+    if config.FLAG_CPU_MODE:
         net.blobs['data'].reshape(* im.shape)
         net.blobs['data'].data[...] = im
         net.forward()