diff --git a/Kaggle/DigitRecognizer/CMakeLists.txt b/Kaggle/DigitRecognizer/CMakeLists.txt deleted file mode 100644 index c69fd356..00000000 --- a/Kaggle/DigitRecognizer/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 2.6) -project(kaggle-digit-recognizer) - -option(DEBUG "DEBUG" OFF) - -set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(SOURCES ${MODEL_SOURCE_DIR}/DigitRecognizer.cpp) - -if(DEBUG) - message("Compilation with debug info (with ggdb3 flag)") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb3") -else() - message("Compilation without debug info (without ggdb3 flag)") -endif() - -add_executable(DigitRecognizer ${SOURCES}) -target_link_libraries(DigitRecognizer - ${MLPACK_LIBRARIES} - ${ARMADILLO_LIBRARIES}) - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data) - add_custom_command(TARGET DigitRecognizer POST_BUILD - COMMAND ${CMAKE_COMMAND} -E tar xvzf ${CMAKE_CURRENT_SOURCE_DIR}/../kaggle_train_test_dataset.zip - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/../) -endif() diff --git a/Kaggle/DigitRecognizer/src/DigitRecognizer.cpp b/Kaggle/DigitRecognizer/src/DigitRecognizer.cpp deleted file mode 100644 index f9078ffb..00000000 --- a/Kaggle/DigitRecognizer/src/DigitRecognizer.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/** - * An example of using Feed Forward Neural Network (FFN) for - * solving Digit Recognizer problem from Kaggle website. - * - * The full description of a problem as well as datasets for training - * and testing are available here https://www.kaggle.com/c/digit-recognizer - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @author Eugene Freyman - */ - -#include -#include - -#include -#include -#include - -#include - -#include - -using namespace mlpack; -using namespace mlpack::ann; - -using namespace arma; -using namespace std; - -using namespace ens; - -int main() -{ - // Dataset is randomly split into validation - // and training parts in the following ratio. - constexpr double RATIO = 0.1; - // The number of neurons in the first layer. - constexpr int H1 = 200; - // The number of neurons in the second layer. - constexpr int H2 = 100; - - // The solution is done in several approaches (CYCLES), so each approach - // uses previous results as a starting point and has different optimizer - // options (here the step size is different). - - // Number of cycles. - constexpr int CYCLES = 20; - - // Step size of the optimizer. - constexpr double STEP_SIZE = 5e-3; - - // Number of data points in each iteration of SGD - constexpr int BATCH_SIZE = 64; - - cout << "Reading data ..." << endl; - - // Labeled dataset that contains data for training is loaded from CSV file, - // rows represent features, columns represent data points. - mat tempDataset; - // The original file could be download from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/train.csv", tempDataset, true); - - // Originally on Kaggle dataset CSV file has header, so it's necessary to - // get rid of the this row, in Armadillo representation it's the first column. - mat dataset = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - // Splitting the dataset on training and validation parts. - mat train, valid; - data::Split(dataset, train, valid, RATIO); - - // Getting training and validating dataset with features only and then - // normalising - const mat trainX = train.submat(1, 0, train.n_rows - 1, - train.n_cols - 1) / 255.0; - const mat validX = valid.submat(1, 0, valid.n_rows - 1, - valid.n_cols - 1) / 255.0; - - const int ITERATIONS_PER_CYCLE = trainX.n_cols; - // According to NegativeLogLikelihood output layer of NN, labels should - // specify class of a data point and be in the interval from 1 to - // number of classes (in this case from 1 to 10). - - // Creating labels for training and validating dataset. - const mat trainY = train.row(0) + 1; - const mat validY = valid.row(0) + 1; - - // Specifying the NN model. NegativeLogLikelihood is the output layer that - // is used for classification problem. GlorotInitialization means that - // initial weights in neurons are a uniform gaussian distribution - FFN, GlorotInitialization> model; - // This is intermediate layer that is needed for connection between input - // data and relu layer. Parameters specify the number of input features - // and number of neurons in the next layer. - model.Add >(trainX.n_rows, H1); - // The first relu layer. - model.Add >(); - // Intermediate layer between relu layers. - model.Add >(H1, H2); - // The second relu layer. - model.Add >(); - // Dropout layer for regularization. First parameter is the probability of - // setting a specific value to 0. - model.Add >(0.2); - // Intermediate layer. - model.Add >(H2, 10); - // LogSoftMax layer is used together with NegativeLogLikelihood for mapping - // output values to log of probabilities of being a specific class. - model.Add >(); - - cout << "Training ..." << endl; - - // Setting parameters Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - // Step size of the optimizer. - STEP_SIZE, - // Batch size. Number of data points that are used in each iteration. - BATCH_SIZE, - // Max number of iterations - ITERATIONS_PER_CYCLE, - // Tolerance, used as a stopping condition. This small number - // means we never stop by this condition and continue to optimize - // up to reaching maximum of iterations. - 1e-8, - // Shuffle. If optimizer should take random data points from the dataset at - // each iteration. - true, - // Adam update policy. - AdamUpdate(1e-8, 0.9, 0.999)); - - // Cycles for monitoring the process of a solution. - for (int i = 1; i <= CYCLES; i++) - { - // Train neural network. If this is the first iteration, weights are - // random, using current values as starting point otherwise. - model.Train(trainX, trainY, optimizer); - - // Don't reset optimizer's parameters between cycles. - optimizer.ResetPolicy() = false; - - mat predOut; - // Getting predictions on training data points. - model.Predict(trainX, predOut); - // Calculating accuracy on training data points. - Row predLabels = getLabels(predOut); - double trainAccuracy = accuracy(predLabels, trainY); - // Getting predictions on validating data points. - model.Predict(validX, predOut); - // Calculating accuracy on validating data points. - predLabels = getLabels(predOut); - double validAccuracy = accuracy(predLabels, validY); - - cout << i << " - accuracy: train = "<< trainAccuracy << "%," << - " valid = "<< validAccuracy << "%" << endl; - } - - cout << "Predicting ..." << endl; - - // Loading test dataset (the one whose predicted labels - // should be sent to Kaggle website). - // As before, it's necessary to get rid of header. - - // The original file could be download from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/test.csv", tempDataset, true); - mat testX = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - mat testPredOut; - // Getting predictions on test data points . - model.Predict(testX, testPredOut); - // Generating labels for the test dataset. - Row testPred = getLabels(testPredOut); - cout << "Saving predicted labels to \"Kaggle/results.csv\" ..." << endl; - - // Saving results into Kaggle compatibe CSV file. - save("Kaggle/results.csv", "ImageId,Label", testPred); - cout << "Results were saved to \"results.csv\" and could be uploaded to " - << "https://www.kaggle.com/c/digit-recognizer/submissions for a competition" - << endl; - cout << "Finished" << endl; -} diff --git a/Kaggle/DigitRecognizerBatchNorm/CMakeLists.txt b/Kaggle/DigitRecognizerBatchNorm/CMakeLists.txt deleted file mode 100644 index c92b7b4b..00000000 --- a/Kaggle/DigitRecognizerBatchNorm/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 2.6) -project(kaggle-digit-recognizer-batchnorm) - -option(DEBUG "DEBUG" OFF) - -set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(SOURCES ${MODEL_SOURCE_DIR}/DigitRecognizerBatchNorm.cpp) - -if(DEBUG) - message("Compilation with debug info (with ggdb3 flag)") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb3") -else() - message("Compilation without debug info (without ggdb3 flag)") -endif() - -add_executable(DigitRecognizerBatchNorm ${SOURCES}) -target_link_libraries(DigitRecognizerBatchNorm - ${MLPACK_LIBRARIES} - ${ARMADILLO_LIBRARIES}) - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data) - add_custom_command(TARGET DigitRecognizerBatchNorm POST_BUILD - COMMAND ${CMAKE_COMMAND} -E tar xvzf ${CMAKE_CURRENT_SOURCE_DIR}/../kaggle_train_test_dataset.zip - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/../) -endif() diff --git a/Kaggle/DigitRecognizerBatchNorm/src/DigitRecognizerBatchNorm.cpp b/Kaggle/DigitRecognizerBatchNorm/src/DigitRecognizerBatchNorm.cpp deleted file mode 100644 index c0409508..00000000 --- a/Kaggle/DigitRecognizerBatchNorm/src/DigitRecognizerBatchNorm.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/** - * An example of using Feed Forward Neural Network (FFN) for - * solving Digit Recognizer problem from Kaggle website. - * - * The full description of a problem as well as datasets for training - * and testing are available here https://www.kaggle.com/c/digit-recognizer - * using BatchNorm - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @author Eugene Freyman - * @author Manthan-R-Sheth - */ - -#include -#include - -#include -#include - -#include - -#include - -using namespace mlpack; -using namespace mlpack::ann; - -using namespace arma; -using namespace std; - -using namespace ens; - -int main() { - // Dataset is randomly split into training - // and validation parts with following ratio. - constexpr double RATIO = 0.1; - // The number of neurons in the first layer. - constexpr int H1 = 100; - // The number of neurons in the second layer. - constexpr int H2 = 100; - - // The solution is done in several approaches (CYCLES), so each approach - // uses previous results as starting point and have a different optimizer - // options (here the step size is different). - - // Number of iteration per cycle. - constexpr int ITERATIONS_PER_CYCLE = 10000; - - // Number of cycles. - constexpr int CYCLES = 20; - - // Step size of an optimizer. - constexpr double STEP_SIZE = 5e-4; - - // Number of data points in each iteration of SGD - // Power of 2 is better for data parallelism - constexpr int BATCH_SIZE = 64; - - cout << "Reading data ..." << endl; - - // Labeled dataset that contains data for training is loaded from CSV file, - // rows represent features, columns represent data points. - mat tempDataset; - // The original file could be download from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/train.csv", tempDataset, true); - - // Originally on Kaggle dataset CSV file has header, so it's necessary to - // get rid of the this row, in Armadillo representation it's the first column. - mat dataset = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - // Splitting the dataset on training and validation parts. - mat train, valid; - data::Split(dataset, train, valid, RATIO); - - // Getting training and validating dataset with features only. - const mat trainX = train.submat(1, 0, train.n_rows - 1, train.n_cols - 1); - const mat validX = valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1); - - // According to NegativeLogLikelihood output layer of NN, labels should - // specify class of a data point and be in the interval from 1 to - // number of classes (in this case from 1 to 10). - - // Creating labels for training and validating dataset. - const mat trainY = train.row(0) + 1; - const mat validY = valid.row(0) + 1; - - // Specifying the NN model. NegativeLogLikelihood is the output layer that - // is used for classification problem. RandomInitialization means that - // initial weights in neurons are generated randomly in the interval - // from -1 to 1. - FFN , RandomInitialization> model; - // This is intermediate layer that is needed for connection between input - // data and PRelU layer. Parameters specify the number of input features - // and number of neurons in the next layer. - model.Add >(trainX.n_rows, H1); - // The first PReLU activation layer. parameter can be set as constructor arg. - model.Add >(); - // BatchNorm layer applied after PReLU activation as it gives better results. - model.Add >(H1); - // Intermediate layer between PReLU activation layers. - model.Add >(H1, H2); - // The second PReLU layer. - model.Add >(); - // Second BatchNorm layer - model.Add >(H2); - // Intermediate layer. - model.Add >(H2, 10); - // LogSoftMax layer is used together with NegativeLogLikelihood for mapping - // output values to log of probabilities of being a specific class. - model.Add >(); - - cout << "Training ..." << endl; - - // Setting parameters Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - // Step size of the optimizer. - STEP_SIZE, - // Batch size. Number of data points that are used in each iteration. - BATCH_SIZE, - // Max number of iterations - ITERATIONS_PER_CYCLE, - // Tolerance, used as a stopping condition. This small number - // means we never stop by this condition and continue to optimize - // up to reaching maximum of iterations. - 1e-8, - // Shuffle. If optimizer should take random data points from the dataset at - // each iteration. - true, - // Adam update policy. - AdamUpdate(1e-8, 0.9, 0.999)); - - // Cycles for monitoring the process of a solution. - for (int i = 0; i <= CYCLES; i++) - { - // Train neural network. If this is the first iteration, weights are - // random, using current values as starting point otherwise. - model.Train(trainX, trainY, optimizer); - - // Don't reset optimizer's parameters between cycles. - optimizer.ResetPolicy() = false; - - mat predOut; - // Getting predictions on training data points. - model.Predict(trainX, predOut); - // Calculating accuracy on training data points. - Row predLabels = getLabels(predOut); - double trainAccuracy = accuracy(predLabels, trainY); - // Getting predictions on validating data points. - model.Predict(validX, predOut); - // Calculating accuracy on validating data points. - predLabels = getLabels(predOut); - double validAccuracy = accuracy(predLabels, validY); - - cout << i << " - accuracy: train = " << trainAccuracy << "%," << - " valid = " << validAccuracy << "%" << endl; - } - - cout << "Predicting ..." << endl; - - // Loading test dataset (the one whose predicted labels - // should be sent to Kaggle website). - // As before, it's necessary to get rid of header. - - // The original file could be download from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/test.csv", tempDataset, true); - mat testX = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - mat testPredOut; - // Getting predictions on test data points . - model.Predict(testX, testPredOut); - // Generating labels for the test dataset. - Row testPred = getLabels(testPredOut); - cout << "Saving predicted labels to \"Kaggle/results.csv\" ..." << endl; - - // Saving results into Kaggle compatibe CSV file. - save("Kaggle/results.csv", "ImageId,Label", testPred); - cout << "Results were saved to \"results.csv\" and could be uploaded to " - << "https://www.kaggle.com/c/digit-recognizer/submissions for a competition" - << endl; - cout << "Finished" << endl; -} diff --git a/Kaggle/DigitRecognizerCNN/CMakeLists.txt b/Kaggle/DigitRecognizerCNN/CMakeLists.txt deleted file mode 100644 index 815b5298..00000000 --- a/Kaggle/DigitRecognizerCNN/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 2.6) -project(kaggle-digit-recognizer-cnn) - -option(DEBUG "DEBUG" OFF) - -set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(SOURCES ${MODEL_SOURCE_DIR}/DigitRecognizerCNN.cpp) - -if(DEBUG) - message("Compilation with debug info (with ggdb3 flag)") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb3") -else() - message("Compilation without debug info (without ggdb3 flag)") -endif() - -add_executable(DigitRecognizerCNN ${SOURCES}) -target_link_libraries(DigitRecognizerCNN - ${MLPACK_LIBRARIES} - ${ARMADILLO_LIBRARIES}) - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data) - add_custom_command(TARGET DigitRecognizerCNN POST_BUILD - COMMAND ${CMAKE_COMMAND} -E tar xvzf ${CMAKE_CURRENT_SOURCE_DIR}/../kaggle_train_test_dataset.zip - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/../) -endif() diff --git a/Kaggle/DigitRecognizerCNN/src/DigitRecognizerCNN.cpp b/Kaggle/DigitRecognizerCNN/src/DigitRecognizerCNN.cpp deleted file mode 100644 index 4c55d14f..00000000 --- a/Kaggle/DigitRecognizerCNN/src/DigitRecognizerCNN.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/** - * An example of using Convolutional Neural Network (CNN) for - * solving Digit Recognizer problem from Kaggle website. - * - * The full description of a problem as well as datasets for training - * and testing are available here https://www.kaggle.com/c/digit-recognizer - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @author Daivik Nema - */ - -#include -#include - -#include -#include - -#include - -#include - -using namespace mlpack; -using namespace mlpack::ann; - -using namespace arma; -using namespace std; - -using namespace ens; - -int main() -{ - // Dataset is randomly split into validation - // and training parts with following ratio. - constexpr double RATIO = 0.1; - - // Number of iteration per cycle. - constexpr int ITERATIONS_PER_CYCLE = 10000; - - // Number of cycles. - constexpr int CYCLES = 40; - - // Step size of the optimizer. - constexpr double STEP_SIZE = 1.2e-3; - - // Number of data points in each iteration of SGD. - constexpr int BATCH_SIZE = 50; - - cout << "Reading data ..." << endl; - - // Labeled dataset that contains data for training is loaded from CSV file. - // Rows represent features, columns represent data points. - mat tempDataset; - - // The original file can be downloaded from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/train.csv", tempDataset, true); - - // The original Kaggle dataset CSV file has headings for each column, - // so it's necessary to get rid of the first row. In Armadillo representation, - // this corresponds to the first column of our data matrix. - mat dataset = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - // Split the dataset into training and validation sets. - mat train, valid; - data::Split(dataset, train, valid, RATIO); - - // The train and valid datasets contain both - the features as well as the - // class labels. Split these into separate mats. - const mat trainX = train.submat(1, 0, train.n_rows - 1, train.n_cols - 1); - const mat validX = valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1); - - // According to NegativeLogLikelihood output layer of NN, labels should - // specify class of a data point and be in the interval from 1 to - // number of classes (in this case from 1 to 10). - - // Create labels for training and validatiion datasets. - const mat trainY = train.row(0) + 1; - const mat validY = valid.row(0) + 1; - - // Specify the NN model. NegativeLogLikelihood is the output layer that - // is used for classification problem. RandomInitialization means that - // initial weights are generated randomly in the interval from -1 to 1. - FFN, RandomInitialization> model; - - // Specify the model architecture. - // In this example, the CNN architecture is chosen similar to LeNet-5. - // The architecture follows a Conv-ReLU-Pool-Conv-ReLU-Pool-Dense schema. We - // have used leaky ReLU activation instead of vanilla ReLU. Standard - // max-pooling has been used for pooling. The first convolution uses 6 filters - // of size 5x5 (and a stride of 1). The second convolution uses 16 filters of - // size 5x5 (stride = 1). The final dense layer is connected to a softmax to - // ensure that we get a valid probability distribution over the output classes - - // Layers schema. - // 28x28x1 --- conv (6 filters of size 5x5. stride = 1) ---> 24x24x6 - // 24x24x6 --------------- Leaky ReLU ---------------------> 24x24x6 - // 24x24x6 --- max pooling (over 2x2 fields. stride = 2) --> 12x12x6 - // 12x12x6 --- conv (16 filters of size 5x5. stride = 1) --> 8x8x16 - // 8x8x16 --------------- Leaky ReLU ---------------------> 8x8x16 - // 8x8x16 --- max pooling (over 2x2 fields. stride = 2) --> 4x4x16 - // 4x4x16 ------------------- Dense ----------------------> 10 - - // Add the first convolution layer. - model.Add >( - 1, // Number of input activation maps. - 6, // Number of output activation maps. - 5, // Filter width. - 5, // Filter height. - 1, // Stride along width. - 1, // Stride along height. - 0, // Padding width. - 0, // Padding height. - 28, // Input width. - 28); // Input Height - - // Add first ReLU. - model.Add >(); - - // Add first pooling layer. Pools over 2x2 fields in the input. - model.Add >( - 2, // Width of field. - 2, // Height of field. - 2, // Stride along width. - 2, // Stride along height. - true); - - // Add the second convolution layer. - model.Add >( - 6, // Number of input activation maps. - 16, // Number of output activation maps. - 5, // Filter width. - 5, // Filter height. - 1, // Stride along width. - 1, // Stride along height. - 0, // Padding width. - 0, // Padding height. - 12, // Input width. - 12); // Input Height - - // Add the second ReLU. - model.Add >(); - - // Add the second pooling layer. - model.Add >(2, 2, 2, 2, true); - - // Add the final dense layer. - model.Add >(16*4*4, 10); - model.Add >(); - - cout << "Training ..." << endl; - - // Set parameters of Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - // Step size of the optimizer. - STEP_SIZE, - // Batch size. Number of data points that are used in each iteration. - BATCH_SIZE, - // Max number of iterations. - ITERATIONS_PER_CYCLE, - // Tolerance, used as a stopping condition. Such a small value - // means we almost never stop by this condition, and continue gradient - // descent until the maximum number of iterations is reached. - 1e-8, - // Shuffle. If optimizer should take random data points from the dataset at - // each iteration. - true, - // Adam update policy. - AdamUpdate(1e-8, 0.9, 0.999)); - - for (int i = 0; i <= CYCLES; i++) - { - // Train the CNN model. If this is the first iteration, weights are - // randomly initialized between -1 and 1. Otherwise, the values of weights - // from the previous iteration are used. - model.Train(trainX, trainY, optimizer); - - // Don't reset optimizers parameters between cycles. - optimizer.ResetPolicy() = false; - - // Matrix to store the predictions on train and validation datasets. - mat predOut; - // Get predictions on training data points. - model.Predict(trainX, predOut); - // Calculate accuracy on training data points. - Row predLabels = getLabels(predOut); - double trainAccuracy = accuracy(predLabels, trainY); - // Get predictions on validating data points. - model.Predict(validX, predOut); - // Calculate accuracy on validating data points. - predLabels = getLabels(predOut); - double validAccuracy = accuracy(predLabels, validY); - - cout << "Epoch " << i << ":\tTraining Accuracy = "<< trainAccuracy<< "%," - <<"\tValidation Accuracy = "<< validAccuracy << "%" << endl; - } - - cout << "Predicting ..." << endl; - - // Load test dataset - // The original file could be download from - // https://www.kaggle.com/c/digit-recognizer/data - data::Load("Kaggle/data/test.csv", tempDataset, true); - - // As before, it's necessary to get rid of column headings. - mat testX = tempDataset.submat(0, 1, - tempDataset.n_rows - 1, tempDataset.n_cols - 1); - - // Matrix to store the predictions on test dataset. - mat testPredOut; - // Get predictions on test data points. - model.Predict(testX, testPredOut); - // Generate labels for the test dataset. - Row testPred = getLabels(testPredOut); - cout << "Saving predicted labels to results.csv."<< endl; - - // Saving results into Kaggle compatibe CSV file. - save("Kaggle/results.csv", "ImageId,Label", testPred); - cout << "Results were saved to Kaggle/results.csv. This file can be uploaded " - << "to https://www.kaggle.com/c/digit-recognizer/submissions." << endl; -} diff --git a/Kaggle/kaggle_train_test_dataset.zip b/Kaggle/kaggle_train_test_dataset.zip deleted file mode 100644 index a6f0b940..00000000 Binary files a/Kaggle/kaggle_train_test_dataset.zip and /dev/null differ diff --git a/Kaggle/kaggle_utils.hpp b/Kaggle/kaggle_utils.hpp deleted file mode 100644 index 9caa5c3b..00000000 --- a/Kaggle/kaggle_utils.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Utitlity functions that is useful for solving Kaggle problems - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @author Eugene Freyman - */ - -#ifndef MODELS_KAGGLE_UTILS_HPP -#define MODELS_KAGGLE_UTILS_HPP - -#include - -/** - * Returns labels bases on predicted probability (or log of probability) - * of classes. - * @param predOut matrix contains probabilities (or log of probability) of - * classes. Each row corresponds to a certain class, each column corresponds - * to a data point. - * @return a row vector of data point's classes. The classes starts from 1 to - * the number of rows in input matrix. - */ -arma::Row getLabels(const arma::mat& predOut) -{ - arma::Row pred(predOut.n_cols); - - // Class of a j-th data point is chosen to be the one with maximum value - // in j-th column plus 1 (since column's elements are numbered from 0). - for (size_t j = 0; j < predOut.n_cols; ++j) - { - pred(j) = arma::as_scalar(arma::find( - arma::max(predOut.col(j)) == predOut.col(j), 1)) + 1; - } - - return pred; -} - -/** - * Returns the accuracy (percentage of correct answers). - * @param predLabels predicted labels of data points. - * @param realY real labels (they are double because we usually read them from - * CSV file that contain many other double values). - * @return percentage of correct answers. - */ -double accuracy(arma::Row predLabels, const arma::mat& realY) -{ - // Calculating how many predicted classes are coincide with real labels. - size_t success = 0; - for (size_t j = 0; j < realY.n_cols; j++) { - if (predLabels(j) == std::round(realY(j))) { - ++success; - } - } - - // Calculating percentage of correctly classified data points. - return (double)success / (double)realY.n_cols * 100.0; -} - -/** - * Saves prediction into specifically formated CSV file, suitable for - * most Kaggle competitions. - * @param filename the name of a file. - * @param header the header in a CSV file. - * @param predLabels predicted labels of data points. Classes of data points - * are expected to start from 1. At the same time classes of data points in - * the file are going to start from 0 (as Kaggle usually expects) - */ -void save(const std::string filename, std::string header, - const arma::Row& predLabels) -{ - std::ofstream out(filename); - out << header << std::endl; - for (size_t j = 0; j < predLabels.n_cols; ++j) - { - // j + 1 because Kaggle indexes start from 1 - // pred - 1 because 1st class is 0, 2nd class is 1 and etc. - out << j + 1 << "," << std::round(predLabels(j)) - 1; - // to avoid an empty line in the end of the file - if (j < predLabels.n_cols - 1) - { - out << std::endl; - } - } - out.close(); -} - -#endif diff --git a/LSTM/TimeSeries-Multivariate/CMakeLists.txt b/LSTM/TimeSeries-Multivariate/CMakeLists.txt deleted file mode 100644 index 6f33587e..00000000 --- a/LSTM/TimeSeries-Multivariate/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -cmake_minimum_required(VERSION 2.6) -project(lstm-time-series) - -option(DEBUG "DEBUG" OFF) - -set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(SOURCES - ${MODEL_SOURCE_DIR}/LSTMTimeSeriesMultivariate.cpp - ) - -if(DEBUG) - message("Compilation with debug info (with ggdb3 flag)") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb3") -else() - message("Compilation without debug info (without ggdb3 flag)") -endif() - - -add_executable(LSTMTimeSeriesMultivariate ${SOURCES}) -target_link_libraries(LSTMTimeSeriesMultivariate - ${MLPACK_LIBRARIES} - ${ARMADILLO_LIBRARIES} - ${Boost_LIBRARIES}) - - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data) - add_custom_command(TARGET LSTMTimeSeriesMultivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PROJECT_BINARY_DIR}/../data) -endif() - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../../../saved_models) - add_custom_command(TARGET LSTMTimeSeriesMultivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PROJECT_BINARY_DIR}/../../saved_models) -endif() - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data/Google2016-2019.csv) - add_custom_command(TARGET LSTMTimeSeriesMultivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_SOURCE_DIR}/../../data/Google2016-2019.csv - ${PROJECT_BINARY_DIR}/../data/Google2016-2019.csv) -endif() - diff --git a/LSTM/TimeSeries-Multivariate/src/LSTMTimeSeriesMultivariate.cpp b/LSTM/TimeSeries-Multivariate/src/LSTMTimeSeriesMultivariate.cpp deleted file mode 100644 index cf55ad70..00000000 --- a/LSTM/TimeSeries-Multivariate/src/LSTMTimeSeriesMultivariate.cpp +++ /dev/null @@ -1,311 +0,0 @@ -/** - * An example of using Recurrent Neural Network (RNN) - * to make forecasts on a time series of Google stock prices. - * which we aim to solve using a simple LSTM neural network. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @file LSTMTimeSeriesMultivariate.cpp - * @author Mehul Kumar Nirala. - * @author Zoltan Somogyi - */ - -/* -NOTE: the data need to be sorted by date in ascending order! The RNN learns from -oldest to newest! - -date close volume open high low -27-06-16 668.26 2632011 671 672.3 663.284 -28-06-16 680.04 2169704 678.97 680.33 673 -29-06-16 684.11 1931436 683 687.4292 681.41 -30-06-16 692.1 1597298 685.47 692.32 683.65 -01-07-16 699.21 1344387 692.2 700.65 692.1301 -05-07-16 694.49 1462879 696.06 696.94 688.88 -06-07-16 697.77 1411080 689.98 701.68 689.09 -07-07-16 695.36 1303661 698.08 698.2 688.215 -08-07-16 705.63 1573909 699.5 705.71 696.435 -11-07-16 715.09 1107039 708.05 716.51 707.24 -... -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace mlpack; -using namespace mlpack::ann; -using namespace ens; - -/** - * Function to calculate MSE for arma::cube. - */ -/* - * Function to calcute MSE for arma::cube. - */ -double MSE(arma::cube &pred, arma::cube &Y) -{ - return metric::SquaredEuclideanDistance::Evaluate(pred, Y) / (Y.n_elem); -} - -/** - * The time series data for training the model contains the Closing stock price, - * the volume of stocks traded, opening stock price, highest stock price and - * lowest stock price for 'rho' days in the past. The two target variables - * (multivariate) we want to predict are the highest stock price and lowest - * stock price (high, low) for the next day. - * - * NOTE: We do not use the last input data point in the training because there - * is no target (next day (high, low)) for that point. - */ -template -void CreateTimeSeriesData(InputDataType dataset, - DataType& X, - LabelType& y, - const size_t rho) -{ - for (size_t i = 0; i < dataset.n_cols - rho; i++) - { - X.subcube(arma::span(), arma::span(i), arma::span()) = - dataset.submat(arma::span(), arma::span(i, i + rho - 1)); - y.subcube(arma::span(), arma::span(i), arma::span()) = - dataset.submat(arma::span(3, 4), arma::span(i + 1, i + rho)); - } -} - -/** - * This function saves the input data for prediction and the prediction results - * in CSV format. The prediction results are the (high, low) for the next day - * and come from the last slice of the prediction. The last 2 columns are the - * predictions; the preceding columns are the data used to generate those - * predictions. - */ -void SaveResults(const string filename, - const arma::cube& predictions, - data::MinMaxScaler& scale, - const arma::cube& testX) -{ - arma::mat flatDataAndPreds = testX.slice(testX.n_slices - 1); - - // The prediction results are the (high, low) for the next day and come from - // the last slice from the prediction. - flatDataAndPreds.rows(flatDataAndPreds.n_rows - 2, - flatDataAndPreds.n_rows - 1) = predictions.slice( - predictions.n_slices - 1); - - scale.InverseTransform(flatDataAndPreds, flatDataAndPreds); - - // We need to remove the last column because it was not used for training - // (there is no next day to predict). - flatDataAndPreds.shed_col(flatDataAndPreds.n_cols - 1); - - // Save the data to file. The last columns are the predictions; the preceding - // columns are the data used to generate those predictions. - data::Save(filename, flatDataAndPreds); - - // Print the output to screen. - // NOTE: we do not have the last data point in the input for the prediction - // because we did not use it for the training, therefore the prediction result - // will be for the day before. In your own application you may of course load - // any dataset for prediction. - cout << "The predicted Google stock (high, low) for the last day is: " - << endl; - cout << " (" << flatDataAndPreds(flatDataAndPreds.n_rows - 2, - flatDataAndPreds.n_cols - 1) << ", "; - cout << flatDataAndPreds(flatDataAndPreds.n_rows - 1, - flatDataAndPreds.n_cols - 1) << ")" << endl; -} - -int main() -{ - // Change the names of these files as necessary. They should be correct - // already, if your program's working directory contains the data and/or - // model. - const string dataFile = "Google2016-2019.csv"; - // example: const string dataFile = - // "C:/mlpack-model-app/Google2016-2019.csv"; - // example: const string dataFile = - // "/home/user/mlpack-model-app/Google2016-2019.csv"; - - const string modelFile = "lstm_multi.bin"; - // example: const string modelFile = - // "C:/mlpack-model-app/lstm_multi.bin"; - // example: const string modelFile = - // "/home/user/mlpack-model-app/lstm_multi.bin"; - - const string predFile = "lstm_multi_predictions.csv"; - - // If true, the model will be trained; if false, the saved model will be - // read and used for prediction - // NOTE: Training the model may take a long time, therefore once it is - // trained you can set this to false and use the model for prediction. - // NOTE: There is no error checking in this example to see if the trained - // model exists! - const bool bTrain = true; - // You can load and further train a model by setting this to true. - const bool bLoadAndTrain = false; - - // Testing data is taken from the dataset in this ratio. - const double RATIO = 0.1; - - // Step size of an optimizer. - const double STEP_SIZE = 5e-5; - - // Number of cells in the LSTM (hidden layers in standard terms). - // NOTE: you may play with this variable in order to further optimize the - // model (as more cells are added, accuracy is likely to go up, but training - // time may take longer). - const int H1 = 25; - - // Number of data points in each iteration of SGD. - const size_t BATCH_SIZE = 16; - - // Nunmber of timesteps to look backward for in the RNN. - const int rho = 25; - - // Max Rho for LSTM. - const int maxRho = rho; - - arma::mat dataset; - - // In Armadillo rows represent features, columns represent data points. - cout << "Reading data ..." << endl; - data::Load(dataFile, dataset, true); - - // The CSV file has a header, so it is necessary to remove it. In Armadillo's - // representation it is the first column. - // The first column in the CSV is the date which is not required, therefore - // we remove it also (first row in in arma::mat). - - dataset = dataset.submat(1, 1, dataset.n_rows - 1, dataset.n_cols - 1); - - // We have 5 input data columns and 2 output columns (target). - size_t inputSize = 5, outputSize = 2; - - // Split the dataset into training and validation sets. - arma::mat trainData = dataset.submat(arma::span(), arma::span(0, (1 - RATIO) * - dataset.n_cols)); - arma::mat testData = dataset.submat(arma::span(), - arma::span((1 - RATIO) * dataset.n_cols, dataset.n_cols - 1)); - - // Number of epochs for training. - const int EPOCHS = 150; - - // Scale all data into the range (0, 1) for increased numerical stability. - data::MinMaxScaler scale; - // Fit scaler only on training data. - scale.Fit(trainData); - scale.Transform(trainData, trainData); - scale.Transform(testData, testData); - - // We need to represent the input data for RNN in an arma::cube (3D matrix). - // The 3rd dimension is the rho number of past data records the RNN uses for - // learning. - arma::cube trainX, trainY, testX, testY; - trainX.set_size(inputSize, trainData.n_cols - rho + 1, rho); - trainY.set_size(outputSize, trainData.n_cols - rho + 1, rho); - testX.set_size(inputSize, testData.n_cols - rho + 1, rho); - testY.set_size(outputSize, testData.n_cols - rho + 1, rho); - - // Create training sets for one-step-ahead regression. - CreateTimeSeriesData(trainData, trainX, trainY, rho); - // Create test sets for one-step-ahead regression. - CreateTimeSeriesData(testData, testX, testY, rho); - - // Only train the model if required. - if (bTrain || bLoadAndTrain) - { - // RNN regression model. - RNN, HeInitialization> model(rho); - - if (bLoadAndTrain) - { - // The model will be trained further. - cout << "Loading and further training model..." << endl; - data::Load(modelFile, "LSTMMulti", model); - } - else - { - // Model building. - model.Add >(); - model.Add >(inputSize, H1, maxRho); - model.Add >(0.5); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(0.5); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(); - model.Add >(H1, outputSize); - } - - // Set parameters for the Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - STEP_SIZE, // Step size of the optimizer. - BATCH_SIZE, // Batch size. Number of data points that are used in each - // iteration. - trainData.n_cols * EPOCHS, // Max number of iterations. - 1e-8, // Tolerance. - true, // Shuffle. - AdamUpdate(1e-8, 0.9, 0.999)); // Adam update policy. - - // Instead of terminating based on the tolerance of the objective function, - // we'll depend on the maximum number of iterations, and terminate early - // using the EarlyStopAtMinLoss callback. - optimizer.Tolerance() = -1; - - cout << "Training ..." << endl; - - model.Train(trainX, - trainY, - optimizer, - // PrintLoss Callback prints loss for each epoch. - ens::PrintLoss(), - // Progressbar Callback prints progress bar for each epoch. - ens::ProgressBar(), - // Stops the optimization process if the loss stops decreasing - // or no improvement has been made. This will terminate the - // optimization once we obtain a minima on training set. - ens::EarlyStopAtMinLoss()); - - cout << "Finished training. \n Saving Model" << endl; - data::Save(modelFile, "LSTMMulti", model); - cout << "Model saved in " << modelFile << endl; - } - - // NOTE: the code below is added in order to show how in a real application - // the model would be saved, loaded and then used for prediction. Please note - // that we do not have the last data point in testX because we did not use it - // for the training, therefore the prediction result will be for the day - // before. In your own application you may of course load any dataset. - - // Load RNN model and use it for prediction. - RNN, HeInitialization> modelP(rho); - cout << "Loading model ..." << endl; - data::Load(modelFile, "LSTMMulti", modelP); - arma::cube predOutP; - - // Get predictions on test data points. - modelP.Predict(testX, predOutP); - // Calculate MSE on prediction. - double testMSEP = MSE(predOutP, testY); - cout << "Mean Squared Error on Prediction data points:= " << testMSEP << endl; - - // Save the output predictions and show the results. - SaveResults(predFile, predOutP, scale, testX); - - // Use this on Windows in order to keep the console window open. - // cout << "Ready!" << endl; - // getchar(); -} diff --git a/LSTM/TimeSeries-Multivariate/tutorial.txt b/LSTM/TimeSeries-Multivariate/tutorial.txt deleted file mode 100644 index 74e97c58..00000000 --- a/LSTM/TimeSeries-Multivariate/tutorial.txt +++ /dev/null @@ -1,174 +0,0 @@ -/*! -@file tutorial.txt -@author Mehul Kumar Nirala -@author Zoltan Somogyi -@brief Tutorial on Multivariate Time Series using RNN. - -@page rnntutorial LSTM Multivariate Time Series - -@section intro_lstmtut Introduction - -We will predict the Google stock price based on historical data by using an LSTM Recurrent Neural Network(RNN) in mlpack. -The input data contains the Google stock prices for the past 3 years from https://www.nasdaq.com/symbol/goog/historical in csv format (Google2016-2019.csv) - -@section toc_lstmtut Table of Contents - -This tutorial is split into the following sections: - - - \ref intro_lstmtut - - \ref toc_lstmtut - - \ref data_lstmtut - - \ref model_lstmtut - - \ref training_lstmtut - - \ref results_lstmtut - -@section data_lstmtut Time Series data - As a first step we normalize the input data using MinMaxScaler in mlpack so that all input features are on the scale between 0 to 1. - -@code - template - DataType MinMaxScaler(DataType& dataset) - { - arma::vec maxValues = arma::max(dataset, 1 /* for each dimension */); - arma::vec minValues = arma::min(dataset, 1 /* for each dimension */); - - arma::vec rangeValues = maxValues - minValues; - - // Add a very small value if there are any zeros. - rangeValues += 1e-25; - - dataset -= arma::repmat(minValues , 1, dataset.n_cols); - dataset /= arma::repmat(rangeValues , 1, dataset.n_cols); - return dataset; - } - ... - // Scale data for increased numerical stability. - dataset = MinMaxScaler(dataset); -@endcode - - * If we want to predict the Google stock price correctly then we need to consider the volume of the stocks traded, the closing, opening, high and low values of the stock price from the previous days. This is a time series problem. - * We will create data for the training of the RNN model that will go back 25 business days in the past for each time step. - * We will convert the input data to the time series format the RNN LSTM requires it. - * We will take 30 % of the latest data as our test dataset. - -The time series data for each time step will contain the volume of the stocks traded, the closing, opening, high and low values of the stock price for the past 25 days and the target variable will be Google’s stock price (high, low) for the next day. -As the stock price prediction is based on multiple input features, it is a multivariate regression problem. - -@code -/* - * The time series data for training the model contains the Closing stock price, the Volume of stocks traded, - * Opening stock price, Highest stock price and Lowest stock price for 'rho' days in the past. - * The two target variables (multivariate) we want to predict are the Highest stock price and Lowest stock price - * (high, low) for the next day! - * - * NOTE: Please note that we do not use the last input data point in the training because there is no target - * (next day (high, low)) for that point! - */ -template - void CreateTimeSeriesData(InputDataType dataset, DataType& X, LabelType& y, size_t rho) -{ - for (size_t i = 0; i < dataset.n_cols - rho; i++) - { - X.subcube(span(), span(i), span()) = dataset.submat(span(), span(i, i + rho - 1)); - y.subcube(span(), span(i), span()) = dataset.submat(span(3, 4), span(i + 1, i + rho)); - } -} -@endcode - -@section model_lstmtut LSTM Model - -We add 3 LSTM modules that will be stacked one after the other in the RNN, implementing an efficient stacked RNN. Finally, the output will have 2 units the (high, low) values of the stock price for the next day. - -@code - // No of timesteps to look in RNN. - const int rho = 25; - // LSTM cell size ('hidden layers') - const int H1 = 25; - size_t inputSize = 5, outputSize = 2; - - // RNN model. - RNN,HeInitialization> model(rho); - //Model building. - model.Add >(); - model.Add >(inputSize, H1, maxRho); - model.Add >(0.5); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(0.5); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(); - model.Add >(H1, outputSize); - -@endcode - -Setting parameters Stochastic Gradient Descent (SGD) optimizer. -@code - - // Setting parameters Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - STEP_SIZE, // Step size of the optimizer. - BATCH_SIZE, // Batch size. Number of data points that are used in each iteration. - ITERATIONS_PER_EPOCH, // Max number of iterations. - 1e-8,// Tolerance. - true,// Shuffle. - AdamUpdate(1e-8, 0.9, 0.999)// Adam update policy. - ); - -@endcode - -@section training_lstmtut Training the model - -@code - cout << "Training ..." << endl; - // Run EPOCH number of cycles for optimizing the solution. - for (int i = 0; i < EPOCH; i++) - { - // Train neural network. If this is the first iteration, weights are - // random, using current values as starting point otherwise. - model.Train(trainX, trainY, optimizer); - - // Don't reset optimizer's parameters between cycles. - optimizer.ResetPolicy() = false; - - cube predOut; - // Getting predictions on test data points. - model.Predict(testX, predOut); - - // Calculating mse on test data points. - double testMSE = MSE(predOut,testY); - cout << i+1 << " - Mean Squared Error := "<< testMSE << endl; - } -@endcode - -As last we use the test data for predicting the stock price with the trained RNN model. Please note that we do not have the last data point in the test data because we did not use it for the training, therefore the prediction result will be for the day before! In your own application you may of course load any dataset. Please look at the code for more information about how the model is saved, loaded and then used for prediction! - -@section results_lstmtut Results - -Reading data ... -Training ... -1 - Mean Squared Error := 0.311178 -2 - Mean Squared Error := 0.30771 -3 - Mean Squared Error := 0.303961 -4 - Mean Squared Error := 0.299234 -5 - Mean Squared Error := 0.291367 -6 - Mean Squared Error := 0.276275 -7 - Mean Squared Error := 0.198768 -8 - Mean Squared Error := 0.112946 -9 - Mean Squared Error := 0.103283 -10 - Mean Squared Error := 0.0965188 -... -100 - Mean Squared Error := 0.0309369 -... -200 - Mean Squared Error := 0.0162842 -... -300 - Mean Squared Error := 0.00906527 -... -500 - Mean Squared Error := 0.00595312 - -The predicted Google stock (high, low) for the last day is the following: -1116.4, 1094.65 - -*/ diff --git a/LSTM/TimeSeries-Univariate/CMakeLists.txt b/LSTM/TimeSeries-Univariate/CMakeLists.txt deleted file mode 100644 index 74ea74b3..00000000 --- a/LSTM/TimeSeries-Univariate/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ -cmake_minimum_required(VERSION 2.6) -project(lstm-time-series) - -option(DEBUG "DEBUG" OFF) - -set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(SOURCES - ${MODEL_SOURCE_DIR}/LSTMTimeSeriesUnivariate.cpp - ) - -if(DEBUG) - message("Compilation with debug info (with ggdb3 flag)") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb3") -else() - message("Compilation without debug info (without ggdb3 flag)") -endif() - - -add_executable(LSTMTimeSeriesUnivariate ${SOURCES}) -target_link_libraries(LSTMTimeSeriesUnivariate - ${MLPACK_LIBRARIES} - ${ARMADILLO_LIBRARIES} - ${Boost_LIBRARIES}) - - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data) - add_custom_command(TARGET LSTMTimeSeriesUnivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PROJECT_BINARY_DIR}/../data) -endif() - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../../../saved_models) - add_custom_command(TARGET LSTMTimeSeriesUnivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PROJECT_BINARY_DIR}/../../saved_models) -endif() - - -if (NOT EXISTS ${PROJECT_BINARY_DIR}/../data/electricity-usage.csv) - add_custom_command(TARGET LSTMTimeSeriesUnivariate POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_SOURCE_DIR}/../../data/electricity-usage.csv - ${PROJECT_BINARY_DIR}/../data/electricity-usage.csv) -endif() diff --git a/LSTM/TimeSeries-Univariate/src/LSTMTimeSeriesUnivariate.cpp b/LSTM/TimeSeries-Univariate/src/LSTMTimeSeriesUnivariate.cpp deleted file mode 100644 index 0586da31..00000000 --- a/LSTM/TimeSeries-Univariate/src/LSTMTimeSeriesUnivariate.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/** - * An example of using Recurrent Neural Network (RNN) - * to make forcasts on a time series of number of kilowatt-hours used in a - * residential home over a 3.5 month period, 25 November 2011 to 17 March 2012, - * which we aim to solve using a simple LSTM neural network. Electricity usage - * as recorded by the local utility company on an hour-by-hour basis. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * - * @file LSTMTimeSeriesUnivariate.cpp - * @author Mehul Kumar Nirala - * @author Zoltan Somogyi - */ - -/* -NOTE: the data need to be sorted by date in ascending order! The RNN learns from -oldest to newest! - -DateTime,Consumption kWh,Off-peak,Mid-peak,On-peak -11/25/2011 01:00:00,0.39,1,0,0 -11/25/2011 02:00:00,0.33,1,0,0 -11/25/2011 03:00:00,0.27,1,0,0 -11/25/2011 04:00:00,0.29,1,0,0 -11/25/2011 05:00:00,0.29,1,0,0 -11/25/2011 06:00:00,0.29,1,0,0 -11/25/2011 07:00:00,0.28,1,0,0 -11/25/2011 08:00:00,0.31,0,0,1 -11/25/2011 09:00:00,0.33,0,0,1 -11/25/2011 10:00:00,0.48,0,0,1 -... -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace mlpack; -using namespace mlpack::ann; -using namespace ens; - -/* - * Function to calcute MSE for arma::cube. - */ -double MSE(arma::cube& pred, arma::cube& Y) -{ - return metric::SquaredEuclideanDistance::Evaluate(pred, Y) / (Y.n_elem); -} - -/** - * The time series data for training the model contains the electricity - * consumption in kWh for 'rho' hours in the past. The target variable we want - * to predict is the electricity consumption in kWh for the next hour. - * - * NOTE: We do not use the last input data point in the training because there - * is no target (next hour) for that point. - */ -template -void CreateTimeSeriesData(InputDataType dataset, - DataType& X, - LabelType& y, - const size_t rho) -{ - for (size_t i = 0; i < dataset.n_cols - rho; i++) - { - X.subcube(arma::span(), arma::span(i), arma::span()) = - dataset.submat(arma::span(), arma::span(i, i + rho - 1)); - y.subcube(arma::span(), arma::span(i), arma::span()) = - dataset.submat(arma::span(), arma::span(i + 1, i + rho)); - } -} - -/** - * This function saves the input data for prediction and the prediction results - * in CSV format. The prediction results are the electricity consumption in kWh - * for the next hour and come from the last slice of the prediction. The last - * column is the prediction; the preceding column is the data used to generate - * those predictions. - */ -void SaveResults(const string& filename, - const arma::cube& predictions, - data::MinMaxScaler& scale, - const arma::cube& testX) -{ - arma::mat flatDataAndPreds = testX.slice(testX.n_slices - 1); - - // The prediction result is the energy consumption for the next hour and comes - // from the last slice of the prediction. - flatDataAndPreds.rows(flatDataAndPreds.n_rows - 1, - flatDataAndPreds.n_rows - 1) = predictions.slice( - predictions.n_slices - 1); - - scale.InverseTransform(flatDataAndPreds, flatDataAndPreds); - // We need to remove the last column because it was not used for training - // (there is no next hour to predict). - flatDataAndPreds.shed_col(flatDataAndPreds.n_cols - 1); - - // Save the data to file. The last columns are the predictions; the preceding - // column is the data used to generate those predictions. - data::Save(filename, flatDataAndPreds); - - // Print the output to screen. - // NOTE: we do not have the last data point in the input for the prediction - // because we did not use it for the training, therefore the prediction result - // will be for the hour before. In your own application you may of course load - // any dataset for prediction. - cout << "The predicted energy consumption for the next hour is : " << endl; - cout << " " << flatDataAndPreds(flatDataAndPreds.n_rows - 1, - flatDataAndPreds.n_cols - 1) << endl; -} - -int main() -{ - // Change the names of these files as necessary. They should be correct - // already, if your program's working directory contains the data and/or - // model. - const string dataFile = "electricity-usage.csv"; - // example: const string dataFile = - // "C:/mlpack-model-app/electricity-usage.csv"; - // example: const string dataFile = - // "/home/user/mlpack-model-app/electricity-usage.csv"; - - const string modelFile = "lstm_univar.bin"; - // example: const string modelFile = - // "C:/mlpack-model-app/lstm_univar.bin"; - // example: const string modelFile = - // "/home/user/mlpack-model-app/lstm_univar.bin"; - - const string predFile = "lstm_univar_predictions.csv"; - - // If true, the model will be trained; if false, the saved model will be read - // and used for prediction. - // NOTE: training the model may take a long time; therefore once it is trained - // you can set this to false and use the model for prediction. - // NOTE: there is no error checking in this example to see if the trained - // model exists. - const bool bTrain = true; - // You can load and further train a model. - const bool bLoadAndTrain = false; - - // Training data is randomly taken from the dataset in this ratio. - const double RATIO = 0.1; - - // Step size of an optimizer. - const double STEP_SIZE = 5e-5; - - // Number of data points in each iteration of SGD. - const size_t BATCH_SIZE = 10; - - // Data has only one dimension. - const size_t inputSize = 1; - - // We are predicting the next value, hence, the output is one dimensional. - const size_t outputSize = 1; - - // Number of timesteps to look backwards in RNN. - const size_t rho = 10; - - // Number of cells in the LSTM (hidden layers in standard terms) - // NOTE: you may play with this variable in order to further optimize the - // model. (as more cells are added, accuracy is likely to go up, but training - // time may take longer) - const int H1 = 10; - - // Max rho for LSTM. - const size_t maxRho = rho; - - arma::mat dataset; - - // In Armadillo rows represent features, columns represent data points. - cout << "Reading data ..." << endl; - data::Load(dataFile, dataset, true); - - // The CSV file has a header, so it is necessary to remove it. In Armadillo's - // representation it is the first column. - // The first column in the CSV is the date which is not required, therefore - // removing it also (first row in in arma::mat). - dataset = dataset.submat(1, 1, 1, dataset.n_cols - 1); - - // Split the dataset into training and validation sets. - arma::mat trainData = dataset.submat(arma::span(), arma::span(0, (1 - RATIO) * - dataset.n_cols)); - arma::mat testData = dataset.submat(arma::span(), - arma::span((1 - RATIO) * dataset.n_cols, dataset.n_cols - 1)); - - // Number of iterations per cycle. - const int EPOCHS = 150; - - // Scale all data into the range (0, 1) for increased numerical stability. - data::MinMaxScaler scale; - // Fit scaler only on training data. - scale.Fit(trainData); - scale.Transform(trainData, trainData); - scale.Transform(testData, testData); - - // We need to represent the input data for RNN in an arma::cube (3D matrix). - // The 3rd dimension is the rho number of past data records the RNN uses for - // learning. - arma::cube trainX, trainY, testX, testY; - trainX.set_size(inputSize, trainData.n_cols - rho + 1, rho); - trainY.set_size(outputSize, trainData.n_cols - rho + 1, rho); - testX.set_size(inputSize, testData.n_cols - rho + 1, rho); - testY.set_size(outputSize, testData.n_cols - rho + 1, rho); - - // Create training sets for one-step-ahead regression. - CreateTimeSeriesData(trainData, trainX, trainY, rho); - // Create test sets for one-step-ahead regression. - CreateTimeSeriesData(testData, testX, testY, rho); - - // Only train the model if required. - if (bTrain || bLoadAndTrain) - { - // RNN regression model. - RNN, HeInitialization> model(rho); - - if (bLoadAndTrain) - { - // The model will be trained further. - cout << "Loading and further training model..." << endl; - data::Load(modelFile, "LSTMUnivar", model); - } - else - { - // Model building. - model.Add >(); - model.Add >(inputSize, H1, maxRho); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(); - model.Add >(H1, outputSize); - } - - // Set parameters for the Stochastic Gradient Descent (SGD) optimizer. - SGD optimizer( - STEP_SIZE, // Step size of the optimizer. - BATCH_SIZE, // Batch size. Number of data points used per iteration. - trainData.n_cols * EPOCHS, // Max number of iterations. - 1e-8, // Tolerance. - true, // Shuffle. - AdamUpdate(1e-8, 0.9, 0.999)); // Adam update policy. - - // Instead of terminating based on the tolerance of the objective function, - // we'll depend on the maximum number of iterations, and terminate early - // using the EarlyStopAtMinLoss callback. - optimizer.Tolerance() = -1; - - cout << "Training ..." << endl; - - model.Train(trainX, - trainY, - optimizer, - // PrintLoss Callback prints loss for each epoch. - ens::PrintLoss(), - // Progressbar Callback prints progress bar for each epoch. - ens::ProgressBar(), - // Stops the optimization process if the loss stops decreasing - // or no improvement has been made. This will terminate the - // optimization once we obtain a minima on training set. - ens::EarlyStopAtMinLoss()); - - cout << "Finished training." << endl; - cout << "Saving Model" << endl; - data::Save(modelFile, "LSTMUnivar", model); - cout << "Model saved in " << modelFile << endl; - } - - // NOTE: the code below is added in order to show how in a real application - // the model would be saved, loaded and then used for prediction. Please note - // that we do not have the last data point in testX because we did not use it - // for the training, therefore the prediction result will be for the hour - // before. In your own application you may of course load any dataset. - - // Load RNN model and use it for prediction. - RNN, HeInitialization> modelP(rho); - cout << "Loading model ..." << endl; - data::Load(modelFile, "LSTMUnivar", modelP); - arma::cube predOutP; - - // Get predictions on the test data points. - modelP.Predict(testX, predOutP); - // Calculate the MSE on the predictions. - double testMSEP = MSE(predOutP, testY); - cout << "Mean Squared Error on Prediction data points: " << testMSEP << endl; - - // Save the output predictions and show the results. - SaveResults(predFile, predOutP, scale, testX); - - // Use this on Windows in order to keep the console window open. - // cout << "Ready!" << endl; - // getchar(); -} diff --git a/LSTM/TimeSeries-Univariate/tutorial.txt b/LSTM/TimeSeries-Univariate/tutorial.txt deleted file mode 100644 index 8d8e20eb..00000000 --- a/LSTM/TimeSeries-Univariate/tutorial.txt +++ /dev/null @@ -1,148 +0,0 @@ -/*! -@file tutorial.txt -@author Mehul Kumar Nirala -@author Zoltan Somogyi -@brief Tutorial on Univariate Time Series using RNN. - -@page rnntutorial LSTM Univariate Time Series - -@section intro_lstmtut Introduction - -An example of using Recurrent Neural Network (RNN) to make forcasts on a time series of electricity usage (in kWh), which we aim to solve using a simple mlpack neural network with LSTM. - -@section toc_lstmtut Table of Contents - -This tutorial is split into the following sections: - - - \ref intro_lstmtut - - \ref toc_lstmtut - - \ref data_lstmtut - - \ref model_lstmtut - - \ref training_lstmtut - - \ref results_lstmtut - - \ref other_results_lstmtut - -@section data_lstmtut Time Series data - We will look at the number of kilowatt-hours used in a residential home over a 3.5 month period, 25 November 2011 to 17 March 2012. Electricity usage as recorded by the local utility company on an hour-by-hour basis. - - Initially we normalize the input data using MinMaxScaler so that all the input features are on the scale between 0 to 1. Normally, it is a good idea to investigate various data preparation techniques to rescale the data and to make it stationary. - -@code - template - DataType MinMaxScaler(DataType& dataset) - { - arma::vec maxValues = arma::max(dataset, 1 /* for each dimension */); - arma::vec minValues = arma::min(dataset, 1 /* for each dimension */); - - arma::vec rangeValues = maxValues - minValues; - - // Add a very small value if there are any zeros. - rangeValues += 1e-25; - - dataset -= arma::repmat(minValues , 1, dataset.n_cols); - dataset /= arma::repmat(rangeValues , 1, dataset.n_cols); - return dataset; - } - ... - // Scale data for increased numerical stability. - dataset = MinMaxScaler(dataset); -@endcode - - -The following will create a dataset where X is the electricity consumption in kWh at a given time step (t), and Y is the electricity consumption in kWh at the next time step (t + 1). The time series data for training the model (X) contains the electricity consumption in kWh for 'rho' hours in the past. The target variable (Y) we want to predict is the electricity consumption in kWh for the next hour! - -@code - template - void CreateTimeSeriesData(InputDataType dataset, DataType& X, LabelType& y, size_t rho) - { - for(size_t i = 0; i >(); - model.Add >(inputSize, H1, maxRho); - model.Add >(); - model.Add >(H1, H1, maxRho); - model.Add >(); - model.Add >(H1, outputSize); - -@endcode - -Setting the parameters for the Stochastic Gradient Descent (SGD) optimizer. -@code - - SGD optimizer( - STEP_SIZE, // Step size of the optimizer. - BATCH_SIZE, // Batch size. Number of data points that are used in each iteration. - ITERATIONS_PER_EPOCH, // Max number of iterations. - 1e-8,// Tolerance. - true,// Shuffle. - AdamUpdate(1e-8, 0.9, 0.999)// Adam update policy. - ); - -@endcode - -@section training_lstmtut Training the model - -@code - cout << "Training ..." << endl; - // Run EPOCH number of cycles for optimizing the solution. - for (int i = 0; i < EPOCH; i++) - { - // Train neural network. If this is the first iteration, weights are - // random, using current values as starting point otherwise. - model.Train(trainX, trainY, optimizer); - - // Don't reset optimizer's parameters between cycles. - optimizer.ResetPolicy() = false; - - cube predOut; - // Getting predictions on test data points. - model.Predict(testX, predOut); - - // Calculating mse on test data points. - double testMSE = MSE(predOut,testY); - cout << i+1<< " - Mean Squared Error := "<< testMSE << endl; - } -@endcode - -@section results_lstmtut Results - -Reading data ... -Training ... -1 - Mean Squared Error := 0.0167553 -2 - Mean Squared Error := 0.00649447 -3 - Mean Squared Error := 0.00558209 -4 - Mean Squared Error := 0.00544844 -5 - Mean Squared Error := 0.00537431 -... -100 - Mean Squared Error := 0.00480882 -Finished training. -Saving Model -Model saved in /saved_models/lstm_univar.bin -Loading model ... -Mean Squared Error on Prediction data points:= 0.00480062 -The predicted energy consumption for the next hour is : - 0.410681 -Ready! - -*/ \ No newline at end of file