diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index f301da382ff8a..325d140d46154 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,42 +6,32 @@ The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np import random + +import paddle.v2 as paddle + from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * def optimizer_config(): - settings( + paddle.config.settings( learning_rate=1e-4, - learning_method=AdamOptimizer(), + learning_method=paddle.config.AdamOptimizer(), batch_size=1000, - model_average=ModelAverage(average_window=0.5), - regularization=L2Regularization(rate=0.5)) + model_average=paddle.config.ModelAverage(average_window=0.5), + regularization=paddle.config.L2Regularization(rate=0.5)) def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( + imgs = paddle.config.data_layer(name='pixel', size=784) + hidden1 = paddle.config.fc_layer(input=imgs, size=200) + hidden2 = paddle.config.fc_layer(input=hidden1, size=200) + inference = paddle.config.fc_layer( + input=hidden2, size=10, act=paddle.config.SoftmaxActivation()) + cost = paddle.config.classification_cost( + input=inference, label=paddle.config.data_layer( name='label', size=10)) - outputs(cost) - - -def init_parameter(network): - assert isinstance(network, api.GradientMachine) - for each_param in network.getParameters(): - assert isinstance(each_param, api.Parameter) - array_size = len(each_param) - array = np.random.uniform(-1.0, 1.0, array_size).astype('float32') - each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array) + paddle.config.outputs(cost) def generator_to_batch(generator, batch_size): @@ -73,42 +63,44 @@ def input_order_converter(generator): def main(): - api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores + paddle.raw.initPaddle("-use_gpu=false", + "-trainer_count=4") # use 4 cpu cores # get enable_types for each optimizer. # enable_types = [value, gradient, momentum, etc] # For each optimizer(SGD, Adam), GradientMachine should enable different # buffers. - opt_config_proto = parse_optimizer_config(optimizer_config) - opt_config = api.OptimizationConfig.createFromProto(opt_config_proto) - _temp_optimizer_ = api.ParameterOptimizer.create(opt_config) + opt_config_proto = paddle.config.parse_optimizer(optimizer_config) + opt_config = paddle.raw.OptimizationConfig.createFromProto(opt_config_proto) + _temp_optimizer_ = paddle.raw.ParameterOptimizer.create(opt_config) enable_types = _temp_optimizer_.getParameterTypes() # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) - m = api.GradientMachine.createFromConfigProto( - model_config, api.CREATE_MODE_NORMAL, enable_types) + model_config = paddle.config.parse_network(network_config) + m = paddle.raw.GradientMachine.createFromConfigProto( + model_config, paddle.raw.CREATE_MODE_NORMAL, enable_types) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm - assert isinstance(m, api.GradientMachine) + assert isinstance(m, paddle.raw.GradientMachine) # Initialize Parameter by numpy. - init_parameter(network=m) + m.randParameters() # Create Local Updater. Local means not run in cluster. # For a cluster training, here we can change to createRemoteUpdater # in future. - updater = api.ParameterUpdater.createLocalUpdater(opt_config) - assert isinstance(updater, api.ParameterUpdater) + updater = paddle.raw.ParameterUpdater.createLocalUpdater(opt_config) + assert isinstance(updater, paddle.raw.ParameterUpdater) # Initialize ParameterUpdater. updater.init(m) # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. - converter = DataProviderConverter( - input_types=[dp.dense_vector(784), dp.integer_value(10)]) + converter = paddle.data.DataProviderConverter(input_types=[ + paddle.data.dense_vector(784), paddle.data.integer_value(10) + ]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' @@ -130,7 +122,7 @@ def main(): # outArgs is Neural Network forward result. Here is not useful, just passed # to gradient_machine.forward - outArgs = api.Arguments.createArguments(0) + outArgs = paddle.raw.Arguments.createArguments(0) for pass_id in xrange(2): # we train 2 passes. updater.startPass() @@ -178,7 +170,7 @@ def main(): test_data_generator = input_order_converter(read_from_mnist(test_file)) for data_batch in generator_to_batch(test_data_generator, 512): # in testing stage, only forward is needed. - m.forward(converter(data_batch), outArgs, api.PASS_TEST) + m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST) m.eval(test_evaluator) # print error rate for test data set @@ -189,8 +181,8 @@ def main(): updater.catchUpWith() params = m.getParameters() for each_param in params: - assert isinstance(each_param, api.Parameter) - value = each_param.getBuf(api.PARAMETER_VALUE) + assert isinstance(each_param, paddle.raw.Parameter) + value = each_param.getBuf(paddle.raw.PARAMETER_VALUE) value = value.copyToNumpyArray() # Here, we could save parameter to every where you want diff --git a/demo/quick_start/.gitignore b/demo/quick_start/.gitignore index f71662563ff96..70b1e5e6b84bd 100644 --- a/demo/quick_start/.gitignore +++ b/demo/quick_start/.gitignore @@ -13,3 +13,6 @@ data/pred.txt dataprovider_copy_1.py train.log output +*.w0 +*.wbias +*.pkl diff --git a/demo/quick_start/api_train_gm.py b/demo/quick_start/api_train_gm.py new file mode 100644 index 0000000000000..0e0b8e2c229a6 --- /dev/null +++ b/demo/quick_start/api_train_gm.py @@ -0,0 +1,242 @@ +import random +import cPickle +import os +import paddle.v2 as paddle + + +class FileReader(object): + """ + :type word_dict: dict + :type __pool__: list + """ + + def __init__(self, word_dict, filename, batch_size, should_shuffle=True): + if isinstance(word_dict, basestring): + self.word_dict = FileReader.read_from_dict(word_dict) + else: + self.word_dict = word_dict + self.__should_shuffle__ = should_shuffle + self.__batch_size__ = batch_size + + self.__pool__ = self.load_all_data(filename) + self.__idx__ = 0 + + def load_all_data(self, filename): + def __mapper__(line): + label, sentence = line.split('\t') + label = int(label) + word_ids = filter(lambda x: x is not None, + map(lambda x: self.word_dict.get(x, None), + sentence.split())) + return word_ids, label + + if filename[-3:] == 'txt': + with open(filename, 'r') as f: + ret_val = map(__mapper__, f) + with open("%s.pkl" % filename[:-4], 'wb') as f: + cPickle.dump(ret_val, f, cPickle.HIGHEST_PROTOCOL) + return ret_val + elif filename[-3:] == 'pkl': + with open(filename, 'rb') as f: + return cPickle.load(f) + + def __iter__(self): + self.reset() + return self + + def reset(self): + if self.__should_shuffle__: + random.shuffle(self.__pool__) + self.__idx__ = 0 + + def next(self): + if self.__idx__ < len(self.__pool__): + end = min(self.__idx__ + self.__batch_size__, len(self.__pool__)) + start = self.__idx__ + self.__idx__ = end + return self.__pool__[start:end] + else: + raise StopIteration() + + @staticmethod + def read_from_dict(fn): + if os.path.exists(fn + '.pkl'): + with open(fn + '.pkl', 'rb') as f: + return cPickle.load(f) + else: + ret_val = dict() + with open(fn, 'r') as f: + for i, line in enumerate(f): + w = line.split()[0] + ret_val[w] = i + with open(fn + '.pkl', 'wb') as f: + cPickle.dump(ret_val, f, cPickle.HIGHEST_PROTOCOL) + return ret_val + + +def optimizer_config(): + paddle.config.settings( + batch_size=1, + learning_rate=1e-4, + learning_method=paddle.config.RMSPropOptimizer()) + + +def bow_config(dict_size): + def __impl__(): + sentence = paddle.config.data_layer(name='sentence', size=dict_size) + inference = paddle.config.fc_layer( + input=sentence, + size=2, + act=paddle.config.SoftmaxActivation(), + param_attr=paddle.config.ParamAttr(sparse_update=True)) + cost = paddle.config.classification_cost( + input=inference, + label=paddle.config.data_layer( + name='label', size=2)) + paddle.config.outputs(cost) + + return __impl__ + + +def swap_batch(batch): + for each_item in batch: + a, b = each_item + yield b, a + + +def main(): + print 'Loading data into memory' + train_file_name = './data/train.pkl' if os.path.exists( + './data/train.pkl') else './data/train.txt' + + test_file_name = './data/test.pkl' if os.path.exists( + './data/test.pkl') else './data/test.txt' + + train_reader = FileReader( + "./data/dict.txt", filename=train_file_name, batch_size=1024) + test_reader = FileReader( + train_reader.word_dict, filename=test_file_name, batch_size=1024) + + print 'Done.' + + paddle.raw.initPaddle('--use_gpu=0', '--trainer_count=3') + + optimizer_proto = paddle.config.parse_optimizer( + optimizer_conf=optimizer_config) + optimizer_conf = paddle.raw.OptimizationConfig.createFromProto( + optimizer_proto) + __tmp_optimizer__ = paddle.raw.ParameterOptimizer.create(optimizer_conf) + assert isinstance(__tmp_optimizer__, paddle.raw.ParameterOptimizer) + enable_types = __tmp_optimizer__.getParameterTypes() + + model_proto = paddle.config.parse_network( + network_conf=bow_config(len(train_reader.word_dict))) + + for param in model_proto.parameters: + if param.sparse_remote_update: + # disable sparse remote update, when local + param.sparse_remote_update = False + + gradient_machine = paddle.raw.GradientMachine.createFromConfigProto( + model_proto, paddle.raw.CREATE_MODE_NORMAL, enable_types) + assert isinstance(gradient_machine, paddle.raw.GradientMachine) + gradient_machine.randParameters() + + updater = paddle.raw.ParameterUpdater.createLocalUpdater(optimizer_conf) + assert isinstance(updater, paddle.raw.ParameterUpdater) + + input_order = model_proto.input_layer_names + input_types = { + 'sentence': + paddle.data.sparse_binary_vector(len(train_reader.word_dict)), + 'label': paddle.data.integer_value(2) + } + + tmp = [] + for each in input_order: + tmp.append(input_types[each]) + + input_types = tmp + + converter = paddle.data.DataProviderConverter(input_types=input_types) + + input_order_for_data = ['sentence', 'label'] + switcher = None + if input_order_for_data != input_order: + switcher = swap_batch + + updater.init(gradient_machine) + + gradient_machine.start() + + train_evaluator = gradient_machine.makeEvaluator() + test_evaluator = gradient_machine.makeEvaluator() + assert isinstance(train_evaluator, paddle.raw.Evaluator) + assert isinstance(test_evaluator, paddle.raw.Evaluator) + + train_evaluate_period = 100 + + out_args = paddle.raw.Arguments.createArguments(0) + assert isinstance(out_args, paddle.raw.Arguments) + for pass_id in xrange(10): + updater.startPass() + for batch_id, data_batch in enumerate(train_reader): + if switcher is not None: + data_batch = switcher(data_batch) + + updater.startBatch(len(data_batch)) + + in_args = converter(data_batch) + + if batch_id % train_evaluate_period == 0: + train_evaluator.start() + + gradient_machine.forwardBackward(in_args, out_args, + paddle.raw.PASS_TRAIN) + + gradient_machine.eval(train_evaluator) + + cost = out_args.sumCosts() / len(data_batch) + + if batch_id % train_evaluate_period == 0: + print 'Pass=%d Batch=%d Cost=%f' % (pass_id, batch_id, + cost), train_evaluator + train_evaluator.finish() + + gradient_machine.eval(train_evaluator) + + for each_param in gradient_machine.getParameters(): + updater.update(each_param) + + updater.finishBatch(cost) + + print 'Pass=%d Batch=%d Cost=%f' % (pass_id, batch_id, + cost), train_evaluator + updater.catchUpWith() + + test_evaluator.start() + for data_batch in test_reader: + if switcher is not None: + data_batch = switcher(data_batch) + + in_args = converter(data_batch) + gradient_machine.forward(in_args, out_args, paddle.raw.PASS_TEST) + gradient_machine.eval(test_evaluator) + + print 'Test Pass=%d' % pass_id, test_evaluator + + print 'Saving parameters.' + for param in gradient_machine.getParameters(): + assert isinstance(param, paddle.raw.Parameter) + save_name = "%d_%s" % (pass_id, param.getName()) + param.save(save_name) + print 'Done.' + + test_evaluator.finish() + + updater.finishPass() + gradient_machine.finish() + + +if __name__ == '__main__': + main() diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 981d10afda267..21d1cb75f4d40 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -34,6 +34,10 @@ def finish_scan(self, argument): class DenseScanner(IScanner): + """ + :type __mat__: numpy.ndarray + """ + def __init__(self, input_type, pos): IScanner.__init__(self, input_type, pos) self.__mat__ = None @@ -47,6 +51,8 @@ def scan(self, dat): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) assert isinstance(self.input_type, dp2.InputType) + if self.__mat__.dtype != numpy.float32: + self.__mat__ = self.__mat__.astype(numpy.float32) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) argument.setSlotValue(self.pos, m) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1cda4762eb2a5..1e660d13fdeaf 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,11 +4,12 @@ set(OUTPUT_DIR file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) - +file(GLOB V2_PY_FILES . ./paddle/v2/*.py) set(PY_FILES paddle/__init__.py ${TRAINER_PY_FILES} ${HELPERS_PY_FILES} - ${UTILS_PY_FILES}) + ${UTILS_PY_FILES} + ${V2_PY_FILES}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py index 13155ebddbb49..a7f5c02802587 100644 --- a/python/paddle/trainer_config_helpers/__init__.py +++ b/python/paddle/trainer_config_helpers/__init__.py @@ -21,5 +21,6 @@ from optimizers import * from attrs import * from config_parser_utils import * + # This will enable operator overload for LayerOutput import layer_math diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py new file mode 100644 index 0000000000000..95d32832f0732 --- /dev/null +++ b/python/paddle/v2/__init__.py @@ -0,0 +1,19 @@ +""" +This is an experimental package for Paddle new API. + +Currently, we use should always use + +.. code-block: python + + import paddle.v2 as paddle + +as our import statement. The API is in flux, never use this package in +production. +""" + +import py_paddle.swig_paddle as raw +import config +import data +import paddle.proto as proto + +__all__ = ['config', 'data', 'raw', 'proto'] diff --git a/python/paddle/v2/config.py b/python/paddle/v2/config.py new file mode 100644 index 0000000000000..48873b26af006 --- /dev/null +++ b/python/paddle/v2/config.py @@ -0,0 +1,12 @@ +from paddle.trainer_config_helpers import * +from paddle.trainer.config_parser import parse_config as parse +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_optimizer_config as parse_optimizer + +import paddle.trainer_config_helpers as tmp + +__all__ = ['parse', 'parse_network', 'parse_optimizer'] + +__all__.extend(filter(lambda x: x[:2] != '__', dir(tmp))) diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py new file mode 100644 index 0000000000000..c1d32f8717361 --- /dev/null +++ b/python/paddle/v2/data.py @@ -0,0 +1,11 @@ +from paddle.trainer.PyDataProvider2 import * +from py_paddle.dataprovider_converter import DataProviderConverter + +__all__ = [ + 'dense_vector', 'dense_vector_sequence', 'dense_vector_sub_sequence', + 'integer_value', 'integer_sequence', 'integer_value_sub_sequence', + 'sparse_binary_vector', 'sparse_binary_vector_sequence', + 'sparse_binary_vector_sub_sequence', 'sparse_vector', + 'sparse_vector_sequence', 'sparse_vector_sub_sequence', 'provider', + 'CacheType', 'DataProviderConverter' +] diff --git a/python/setup.py.in b/python/setup.py.in index b66a42e87c787..1e1324eea825a 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -4,7 +4,8 @@ packages=['paddle', 'paddle.proto', 'paddle.trainer', 'paddle.trainer_config_helpers', - 'paddle.utils'] + 'paddle.utils', + 'paddle.v2'] setup(name='paddle', version='${PADDLE_VERSION}',