evaluation&mnist example

Zhylkaaa · May 19, 2020 · efee05a · efee05a
1 parent d332200
commit efee05a
Show file tree

Hide file tree

Showing 11 changed files with 60,209 additions and 75 deletions.
diff --git a/includes/MLP.h b/includes/MLP.h
@@ -30,6 +30,11 @@ class MLP {
 
     uint32_t batch_size;
     uint32_t input_size;
+
+    void eval_batch(std::vector<std::vector<float>>& batch, VkCommandBuffer& evalCommandBuffer,
+                    VkDeviceMemory evalDeviceMemory, std::vector<uint64_t>& eval_offsets, uint32_t& correct_predictions,
+                    std::vector<std::vector<float>>& true_labels);
+
 public:
     MLP();
 
@@ -56,6 +61,8 @@ class MLP {
 
     std::vector<std::pair<Tensor, Tensor>> get_trainable_parameters();
 
+    float evaluate(std::vector<std::vector<float>>& X, std::vector<std::vector<float>>& y);
+
     ~MLP();
 
     // DEBUG

diff --git a/includes/classification_trainer.h b/includes/classification_trainer.h
@@ -31,7 +31,8 @@ class ClassificationTrainer: public Trainer {
     ClassificationTrainer(MLP* mlp, std::vector<example> &dataset, const std::unordered_map<std::string, float>& optimizer_params,
                           const std::string& optimizer="sgd");
 
-    void train(uint32_t num_iterations, uint32_t print_every=0) override;
+    void train(uint32_t num_iterations, uint32_t print_every) override;
+    void train(uint32_t num_iterations, std::vector<float>& loss_history, uint32_t print_every) override;
 
     float compute_loss(const std::vector<std::vector<float>>& labels) override;
 

diff --git a/includes/trainer.h b/includes/trainer.h
@@ -35,7 +35,9 @@ class Trainer {
     std::vector<Layer*>& get_layers() {return mlp->layers;}
     VkQueue& get_queue() {return mlp->queue;}
 
-    virtual void train(uint32_t num_iterations, uint32_t print_every=0) = 0;
+    virtual void train(uint32_t num_iterations, uint32_t print_every) = 0;
+    virtual void train(uint32_t num_iterations, std::vector<float>& loss_history, uint32_t print_every) = 0;
+
     virtual float compute_loss(const std::vector<std::vector<float>>& labels) = 0;
 
     virtual ~Trainer() = default;

diff --git a/main.cpp b/main.cpp
@@ -3,72 +3,67 @@
 #include <string>
 #include <vulkan_init.h>
 #include <classification_trainer.h>
+#include <fstream>
 
 int main() {
 
-    std::vector<int> layers{5};
+    std::vector<int> layers{10};
     std::vector<std::string> activations{"softmax"};
-    MLP mlp = MLP(3, 3, layers, activations);
-
-    std::vector<std::vector<float>> batch{{1, 2, 3},
-                                          {2, 1, 3},
-                                          {3, 2, 1}};
-
-    std::vector<std::vector<float>> host_labels{{1, 0, 0, 0, 0},
-                                                {0, 1, 0, 0, 0},
-                                                {0, 0, 1, 0, 0}};
+    MLP mlp = MLP(784, 32, layers, activations);
 
     mlp.forward_initialize();
-    mlp.forward(batch);
 
-    std::vector<example> dataset;
+    std::vector<example> train_dataset;
 
-    for(int i = 0;i<3;i++){
-        example e;
-        e.x = batch[i];
-        e.y = host_labels[i];
+    std::vector<std::vector<float>> val_x;
+    std::vector<std::vector<float>> val_y;
 
-        dataset.push_back(e);
-    }
+    std::ifstream train_image_input("../train_MNIST_images.txt");
+    std::ifstream val_image_input("../val_MNIST_images.txt", std::ios::in);
 
-    std::unordered_map<std::string, float> params;
-    params["learning_rate"] = 3;
+    std::ifstream train_label_input("../train_MNIST_labels.txt", std::ios::in);
+    std::ifstream val_label_input("../val_MNIST_labels.txt", std::ios::in);
+
+    if(!train_image_input.is_open() || !val_image_input.is_open() || !train_label_input.is_open() ||
+    !val_label_input.is_open())throw std::runtime_error("can't read training data");
 
-    ClassificationTrainer trainer = ClassificationTrainer(&mlp, dataset, params);
+    for(int i = 0;i<20000;i++){
+        std::vector<float> train_x(784);
+        std::vector<float> v_x(784);
 
-    trainer.train(100, 1);
+        std::vector<float> train_y(10);
+        std::vector<float> v_y(10);
+        for(int j = 0;j<784;j++){
+            train_image_input>>train_x[j];
+            if(i<10000)val_image_input>>v_x[j];
+        }
 
-    mlp.forward(batch);
+        for(int j = 0;j<10;j++){
+            train_label_input>>train_y[j];
+            if(i<10000)val_label_input>>v_y[j];
+        }
 
-    /*VkBuffer labels;
-    VkDeviceMemory deviceMemory;
-    createBuffer(mlp.get_device(), mlp.get_queue_index(), labels, 3, 5);
-    std::vector<VkBuffer*> buffers{&labels};
-    std::vector<uint64_t> offsets;
-    allocateAndBindBuffers(mlp.get_device(), mlp.get_physicalDevice(), buffers, deviceMemory, offsets);
+        example train_e{.x=train_x, .y=train_y};
 
-    char* data = nullptr;
-    if(vkMapMemory(mlp.get_device(), deviceMemory, 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void **>(&data)) != VK_SUCCESS){
-        throw std::runtime_error("failed to map device memory");
-    }
-    float* device_labels = reinterpret_cast<float*>(data + offsets[0]);
-    for(int i = 0;i<host_labels.size();i++){
-        for(int j=0;j<host_labels[0].size();j++){
-            device_labels[i*5+j] = host_labels[i][j];
+        train_dataset.push_back(train_e);
+        if(i<10000){
+            val_x.push_back(v_x);
+            val_y.push_back(v_y);
         }
     }
-    vkUnmapMemory(mlp.get_device(), deviceMemory);
 
-    mlp.backward_initialize(labels);
+    std::cout<<"accuracy before training: "<<mlp.evaluate(val_x, val_y)<<std::endl;
 
-    for(int i = 0;i<1000;i++){
-        mlp.forward(batch);
-        mlp.backward();
-    }
+    std::unordered_map<std::string, float> params;
+    params["learning_rate"] = 0.3;
+
+    ClassificationTrainer trainer = ClassificationTrainer(&mlp, train_dataset, params);
+
+    std::vector<float> loss_history;
 
+    trainer.train(1000, loss_history, 1);
 
-    vkFreeMemory(mlp.get_device(), deviceMemory, nullptr);
-    vkDestroyBuffer(mlp.get_device(), labels, nullptr);*/
+    std::cout<<"accuracy after training: "<<mlp.evaluate(val_x, val_y)<<std::endl;
 
     return 0;
 }
diff --git a/shaders/argmax.comp b/shaders/argmax.comp
@@ -0,0 +1,31 @@
+#version 450
+
+layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
+
+layout(std430, binding=0) buffer inP { float predictions[]; };
+layout(std430, binding=1) buffer inAm { uint argmax[]; };
+
+layout(push_constant) uniform dim {
+    uint batch_size;
+    uint inp_dim;
+};
+
+void main(){
+    uint x = gl_GlobalInvocationID.x;
+
+    if(x >= batch_size)return;
+
+    uint idx = x*inp_dim;
+
+    uint res = 0;
+    float m = predictions[idx];
+
+    for(uint i = 1;i<inp_dim;i++){
+        if(m < predictions[idx + i]){
+            res = i;
+            m = predictions[idx + i];
+        }
+    }
+
+    argmax[x] = res;
+}
diff --git a/src/MLP.cpp b/src/MLP.cpp
@@ -86,26 +86,6 @@ void MLP::forward(const std::vector<std::vector<float> > &batch) {
     for(Layer* layer : layers){
         layer->forward(queue);
     }
-
-#ifndef NDEBUG
-    std::cout<<"output is:"<<std::endl;
-
-    int n = layers.size();
-    data = nullptr;
-    if(vkMapMemory(device, layers[n-1]->get_forward_device_memory(), 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void **>(&data)) != VK_SUCCESS){
-        throw std::runtime_error("failed to map device memory");
-    }
-
-    float* output = reinterpret_cast<float*>(data + layers[n-1]->get_output_offset());
-
-    for(int i = 0;i<this->batch_size;i++){
-        for(int j = 0;j<layers[n-1]->get_output_dim();j++){
-            std::cout<<output[i*layers[n-1]->get_output_dim() + j]<<" ";
-        }
-        std::cout<<std::endl;
-    }
-    vkUnmapMemory(device, layers[n-1]->get_forward_device_memory());
-#endif
 }
 
 MLP::MLP() {
@@ -154,5 +134,117 @@ MLP::~MLP() {
     vkDestroyInstance(instance, nullptr);
 }
 
+void MLP::eval_batch(std::vector<std::vector<float>>& batch, VkCommandBuffer& evalCommandBuffer,
+        VkDeviceMemory evalDeviceMemory, std::vector<uint64_t>& eval_offsets, uint32_t& correct_predictions,
+        std::vector<std::vector<float>>& true_labels){
+
+    forward(batch);
+    submitTask(queue, &evalCommandBuffer);
+
+    char *data = nullptr;
+    if(vkMapMemory(device, evalDeviceMemory, 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void**>(&data)) != VK_SUCCESS){
+        throw std::runtime_error("failed to map device memory");
+    }
+
+    uint32_t* p_labels = reinterpret_cast<uint32_t*>(data + eval_offsets[0]);
+
+    for(uint32_t k = 0;k<batch_size;k++){
+        if(true_labels[k][p_labels[k]] == 1)correct_predictions++;
+    }
+
+    vkUnmapMemory(device, evalDeviceMemory);
+}
+
+float MLP::evaluate(std::vector<std::vector<float>> &X, std::vector<std::vector<float>> &y) {
+    if(X.size() != y.size() || X.empty()){
+        throw std::invalid_argument("X and y should have same size > 0");
+    }
+
+    if(y[0].size() != get_output_dim()){
+        throw std::invalid_argument("model's output size doesn't much label's");
+    }
+
+    uint32_t iters = X.size() / batch_size;
+
+    uint32_t ex = 0;
+
+    VkBuffer predicted_labels;
+    createBuffer(device, queueFamilyIndex, predicted_labels, batch_size, 1, sizeof(uint32_t));
+
+    std::vector<VkBuffer*> buffers{&predicted_labels};
+
+    VkDeviceMemory evalDeviceMemory;
+    std::vector<uint64_t> eval_offsets;
+    VkDescriptorSetLayout evalSetLayout;
+    VkPipelineLayout evalPipelineLayout;
+    VkPipeline evalPipeline;
+    VkDescriptorPool evalDescriptorPool;
+    VkDescriptorSet evalDescriptorSet;
+    VkCommandPool evalCommandPool;
+    VkCommandBuffer evalCommandBuffer;
+
+    struct push_const {
+        uint32_t batch_size;
+        uint32_t inp_dim;
+    } dim{};
+
+    dim.batch_size = batch_size;
+    dim.inp_dim = get_output_dim();
+
+    allocateAndBindBuffers(device, physicalDevice, buffers, evalDeviceMemory, eval_offsets);
+
+    createPipelineLayout(device, 2, evalSetLayout, evalPipelineLayout, sizeof(push_const));
+    createComputePipeline(device, "../shaders/argmax.comp.spv", evalPipelineLayout, evalPipeline);
+
+    buffers.insert(buffers.begin(), &get_output());
+
+    allocateDescriptorSet(device, buffers, evalDescriptorPool, evalSetLayout, evalDescriptorSet);
+    createCommandPoolAndBuffer(device, queueFamilyIndex, evalCommandPool, evalCommandBuffer);
+
+    recordComputePipeline(evalCommandBuffer, evalPipelineLayout, sizeof(push_const), reinterpret_cast<void*>(&dim),
+                          evalPipeline, evalDescriptorSet, (dim.batch_size+31)/32, 1, 1);
+
+    uint32_t correct_predictions = 0;
+
+    std::vector<std::vector<float>> batch(batch_size);
+    std::vector<std::vector<float>> true_labels(batch_size);
+
+    for(uint32_t i = 0;i<iters;i++){
+        for(uint32_t j=0;j<batch_size;j++){
+            batch[j] = X[ex];
+            true_labels[j] = y[ex];
+            ex++;
+        }
+
+        eval_batch(batch, evalCommandBuffer, evalDeviceMemory, eval_offsets, correct_predictions, true_labels);
+    }
+
+    if(ex != X.size()){
+        uint32_t j;
+        for(j=0;j<batch_size && ex != X.size();j++){
+            batch[j] = X[ex];
+            true_labels[j] = y[ex];
+            ex++;
+        }
+
+        for(;j<batch_size;j++){
+            batch[j] = std::vector<float>(input_size, 0);
+            true_labels[j] = std::vector<float>(get_output_dim(), 0);
+        }
+
+        eval_batch(batch, evalCommandBuffer, evalDeviceMemory, eval_offsets, correct_predictions, true_labels);
+    }
+
+    vkDestroyCommandPool(device, evalCommandPool, nullptr);
+    vkFreeMemory(device, evalDeviceMemory, nullptr);
+    vkDestroyBuffer(device, predicted_labels, nullptr);
+    vkDestroyDescriptorPool(device, evalDescriptorPool, nullptr);
+    vkDestroyPipeline(device, evalPipeline, nullptr);
+    vkDestroyPipelineLayout(device, evalPipelineLayout, nullptr);
+    vkDestroyDescriptorSetLayout(device, evalSetLayout, nullptr);
+
+    return static_cast<float>(correct_predictions) / X.size();
+}
+
 
 
diff --git a/src/classification_trainer.cpp b/src/classification_trainer.cpp
@@ -32,22 +32,26 @@ ClassificationTrainer::ClassificationTrainer(MLP* mlp,
     }
 }
 
-void inline create_batch(std::vector<example>& batch, std::mt19937& gen, std::uniform_int_distribution<int>& distribution,
+void inline create_batch(std::vector<example>& batch, uint32_t& iterator,
         std::vector<example>& dataset, uint32_t batch_size){
-    for(uint32_t i=0;i<batch_size; i++){
-        batch[i] = dataset[distribution(gen)];
+    for(uint32_t k = 0;k<batch_size;k++){
+        batch[k] = dataset[iterator++];
+        iterator %= dataset.size();
     }
 }
 
 void ClassificationTrainer::train(uint32_t num_iterations, uint32_t print_every) {
+    std::vector<float> tmp_loss_history;
+    train(num_iterations, tmp_loss_history, print_every);
+}
+
+void ClassificationTrainer::train(uint32_t num_iterations, std::vector<float> &loss_history, uint32_t print_every) {
     std::vector<example> batch(mlp->get_batch_size());
 
-    std::random_device rd;
-    std::mt19937 gen(rd());
-    std::uniform_int_distribution<int> distribution(0,dataset.size()-1);
+    uint32_t iterator = 0;
 
     for(int i=0;i<num_iterations;i++){
-        create_batch(batch, gen, distribution, dataset, mlp->get_batch_size());
+        create_batch(batch, iterator, dataset, mlp->get_batch_size());
 
         std::vector<std::vector<float>> x_batch(mlp->get_batch_size(), std::vector<float>(dataset[0].x.size()));
         std::vector<std::vector<float>> y_batch(mlp->get_batch_size(), std::vector<float>(dataset[0].y.size()));
@@ -81,7 +85,9 @@ void ClassificationTrainer::train(uint32_t num_iterations, uint32_t print_every)
         parameters_optimizer->optimize(get_queue());
 
         if((print_every != 0 && i % print_every == 0) || i == num_iterations-1){
-            std::cout<<"step: "<<i<<" loss: "<<compute_loss(y_batch)<<std::endl;
+            float l = compute_loss(y_batch);
+            std::cout<<"step: "<<i<<" loss: "<<l<<std::endl;
+            loss_history.push_back(l);
         }
     }
 }