Skip to content

Commit

Permalink
evaluation&mnist example
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhylkaaa committed May 19, 2020
1 parent d332200 commit efee05a
Show file tree
Hide file tree
Showing 11 changed files with 60,209 additions and 75 deletions.
7 changes: 7 additions & 0 deletions includes/MLP.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ class MLP {

uint32_t batch_size;
uint32_t input_size;

void eval_batch(std::vector<std::vector<float>>& batch, VkCommandBuffer& evalCommandBuffer,
VkDeviceMemory evalDeviceMemory, std::vector<uint64_t>& eval_offsets, uint32_t& correct_predictions,
std::vector<std::vector<float>>& true_labels);

public:
MLP();

Expand All @@ -56,6 +61,8 @@ class MLP {

std::vector<std::pair<Tensor, Tensor>> get_trainable_parameters();

float evaluate(std::vector<std::vector<float>>& X, std::vector<std::vector<float>>& y);

~MLP();

// DEBUG
Expand Down
3 changes: 2 additions & 1 deletion includes/classification_trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ class ClassificationTrainer: public Trainer {
ClassificationTrainer(MLP* mlp, std::vector<example> &dataset, const std::unordered_map<std::string, float>& optimizer_params,
const std::string& optimizer="sgd");

void train(uint32_t num_iterations, uint32_t print_every=0) override;
void train(uint32_t num_iterations, uint32_t print_every) override;
void train(uint32_t num_iterations, std::vector<float>& loss_history, uint32_t print_every) override;

float compute_loss(const std::vector<std::vector<float>>& labels) override;

Expand Down
4 changes: 3 additions & 1 deletion includes/trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ class Trainer {
std::vector<Layer*>& get_layers() {return mlp->layers;}
VkQueue& get_queue() {return mlp->queue;}

virtual void train(uint32_t num_iterations, uint32_t print_every=0) = 0;
virtual void train(uint32_t num_iterations, uint32_t print_every) = 0;
virtual void train(uint32_t num_iterations, std::vector<float>& loss_history, uint32_t print_every) = 0;

virtual float compute_loss(const std::vector<std::vector<float>>& labels) = 0;

virtual ~Trainer() = default;
Expand Down
85 changes: 40 additions & 45 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,72 +3,67 @@
#include <string>
#include <vulkan_init.h>
#include <classification_trainer.h>
#include <fstream>

int main() {

std::vector<int> layers{5};
std::vector<int> layers{10};
std::vector<std::string> activations{"softmax"};
MLP mlp = MLP(3, 3, layers, activations);

std::vector<std::vector<float>> batch{{1, 2, 3},
{2, 1, 3},
{3, 2, 1}};

std::vector<std::vector<float>> host_labels{{1, 0, 0, 0, 0},
{0, 1, 0, 0, 0},
{0, 0, 1, 0, 0}};
MLP mlp = MLP(784, 32, layers, activations);

mlp.forward_initialize();
mlp.forward(batch);

std::vector<example> dataset;
std::vector<example> train_dataset;

for(int i = 0;i<3;i++){
example e;
e.x = batch[i];
e.y = host_labels[i];
std::vector<std::vector<float>> val_x;
std::vector<std::vector<float>> val_y;

dataset.push_back(e);
}
std::ifstream train_image_input("../train_MNIST_images.txt");
std::ifstream val_image_input("../val_MNIST_images.txt", std::ios::in);

std::unordered_map<std::string, float> params;
params["learning_rate"] = 3;
std::ifstream train_label_input("../train_MNIST_labels.txt", std::ios::in);
std::ifstream val_label_input("../val_MNIST_labels.txt", std::ios::in);

if(!train_image_input.is_open() || !val_image_input.is_open() || !train_label_input.is_open() ||
!val_label_input.is_open())throw std::runtime_error("can't read training data");

ClassificationTrainer trainer = ClassificationTrainer(&mlp, dataset, params);
for(int i = 0;i<20000;i++){
std::vector<float> train_x(784);
std::vector<float> v_x(784);

trainer.train(100, 1);
std::vector<float> train_y(10);
std::vector<float> v_y(10);
for(int j = 0;j<784;j++){
train_image_input>>train_x[j];
if(i<10000)val_image_input>>v_x[j];
}

mlp.forward(batch);
for(int j = 0;j<10;j++){
train_label_input>>train_y[j];
if(i<10000)val_label_input>>v_y[j];
}

/*VkBuffer labels;
VkDeviceMemory deviceMemory;
createBuffer(mlp.get_device(), mlp.get_queue_index(), labels, 3, 5);
std::vector<VkBuffer*> buffers{&labels};
std::vector<uint64_t> offsets;
allocateAndBindBuffers(mlp.get_device(), mlp.get_physicalDevice(), buffers, deviceMemory, offsets);
example train_e{.x=train_x, .y=train_y};

char* data = nullptr;
if(vkMapMemory(mlp.get_device(), deviceMemory, 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void **>(&data)) != VK_SUCCESS){
throw std::runtime_error("failed to map device memory");
}
float* device_labels = reinterpret_cast<float*>(data + offsets[0]);
for(int i = 0;i<host_labels.size();i++){
for(int j=0;j<host_labels[0].size();j++){
device_labels[i*5+j] = host_labels[i][j];
train_dataset.push_back(train_e);
if(i<10000){
val_x.push_back(v_x);
val_y.push_back(v_y);
}
}
vkUnmapMemory(mlp.get_device(), deviceMemory);

mlp.backward_initialize(labels);
std::cout<<"accuracy before training: "<<mlp.evaluate(val_x, val_y)<<std::endl;

for(int i = 0;i<1000;i++){
mlp.forward(batch);
mlp.backward();
}
std::unordered_map<std::string, float> params;
params["learning_rate"] = 0.3;

ClassificationTrainer trainer = ClassificationTrainer(&mlp, train_dataset, params);

std::vector<float> loss_history;

trainer.train(1000, loss_history, 1);

vkFreeMemory(mlp.get_device(), deviceMemory, nullptr);
vkDestroyBuffer(mlp.get_device(), labels, nullptr);*/
std::cout<<"accuracy after training: "<<mlp.evaluate(val_x, val_y)<<std::endl;

return 0;
}
31 changes: 31 additions & 0 deletions shaders/argmax.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#version 450

layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(std430, binding=0) buffer inP { float predictions[]; };
layout(std430, binding=1) buffer inAm { uint argmax[]; };

layout(push_constant) uniform dim {
uint batch_size;
uint inp_dim;
};

void main(){
uint x = gl_GlobalInvocationID.x;

if(x >= batch_size)return;

uint idx = x*inp_dim;

uint res = 0;
float m = predictions[idx];

for(uint i = 1;i<inp_dim;i++){
if(m < predictions[idx + i]){
res = i;
m = predictions[idx + i];
}
}

argmax[x] = res;
}
132 changes: 112 additions & 20 deletions src/MLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,26 +86,6 @@ void MLP::forward(const std::vector<std::vector<float> > &batch) {
for(Layer* layer : layers){
layer->forward(queue);
}

#ifndef NDEBUG
std::cout<<"output is:"<<std::endl;

int n = layers.size();
data = nullptr;
if(vkMapMemory(device, layers[n-1]->get_forward_device_memory(), 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void **>(&data)) != VK_SUCCESS){
throw std::runtime_error("failed to map device memory");
}

float* output = reinterpret_cast<float*>(data + layers[n-1]->get_output_offset());

for(int i = 0;i<this->batch_size;i++){
for(int j = 0;j<layers[n-1]->get_output_dim();j++){
std::cout<<output[i*layers[n-1]->get_output_dim() + j]<<" ";
}
std::cout<<std::endl;
}
vkUnmapMemory(device, layers[n-1]->get_forward_device_memory());
#endif
}

MLP::MLP() {
Expand Down Expand Up @@ -154,5 +134,117 @@ MLP::~MLP() {
vkDestroyInstance(instance, nullptr);
}

void MLP::eval_batch(std::vector<std::vector<float>>& batch, VkCommandBuffer& evalCommandBuffer,
VkDeviceMemory evalDeviceMemory, std::vector<uint64_t>& eval_offsets, uint32_t& correct_predictions,
std::vector<std::vector<float>>& true_labels){

forward(batch);
submitTask(queue, &evalCommandBuffer);

char *data = nullptr;
if(vkMapMemory(device, evalDeviceMemory, 0, VK_WHOLE_SIZE, 0, reinterpret_cast<void**>(&data)) != VK_SUCCESS){
throw std::runtime_error("failed to map device memory");
}

uint32_t* p_labels = reinterpret_cast<uint32_t*>(data + eval_offsets[0]);

for(uint32_t k = 0;k<batch_size;k++){
if(true_labels[k][p_labels[k]] == 1)correct_predictions++;
}

vkUnmapMemory(device, evalDeviceMemory);
}

float MLP::evaluate(std::vector<std::vector<float>> &X, std::vector<std::vector<float>> &y) {
if(X.size() != y.size() || X.empty()){
throw std::invalid_argument("X and y should have same size > 0");
}

if(y[0].size() != get_output_dim()){
throw std::invalid_argument("model's output size doesn't much label's");
}

uint32_t iters = X.size() / batch_size;

uint32_t ex = 0;

VkBuffer predicted_labels;
createBuffer(device, queueFamilyIndex, predicted_labels, batch_size, 1, sizeof(uint32_t));

std::vector<VkBuffer*> buffers{&predicted_labels};

VkDeviceMemory evalDeviceMemory;
std::vector<uint64_t> eval_offsets;
VkDescriptorSetLayout evalSetLayout;
VkPipelineLayout evalPipelineLayout;
VkPipeline evalPipeline;
VkDescriptorPool evalDescriptorPool;
VkDescriptorSet evalDescriptorSet;
VkCommandPool evalCommandPool;
VkCommandBuffer evalCommandBuffer;

struct push_const {
uint32_t batch_size;
uint32_t inp_dim;
} dim{};

dim.batch_size = batch_size;
dim.inp_dim = get_output_dim();

allocateAndBindBuffers(device, physicalDevice, buffers, evalDeviceMemory, eval_offsets);

createPipelineLayout(device, 2, evalSetLayout, evalPipelineLayout, sizeof(push_const));
createComputePipeline(device, "../shaders/argmax.comp.spv", evalPipelineLayout, evalPipeline);

buffers.insert(buffers.begin(), &get_output());

allocateDescriptorSet(device, buffers, evalDescriptorPool, evalSetLayout, evalDescriptorSet);
createCommandPoolAndBuffer(device, queueFamilyIndex, evalCommandPool, evalCommandBuffer);

recordComputePipeline(evalCommandBuffer, evalPipelineLayout, sizeof(push_const), reinterpret_cast<void*>(&dim),
evalPipeline, evalDescriptorSet, (dim.batch_size+31)/32, 1, 1);

uint32_t correct_predictions = 0;

std::vector<std::vector<float>> batch(batch_size);
std::vector<std::vector<float>> true_labels(batch_size);

for(uint32_t i = 0;i<iters;i++){
for(uint32_t j=0;j<batch_size;j++){
batch[j] = X[ex];
true_labels[j] = y[ex];
ex++;
}

eval_batch(batch, evalCommandBuffer, evalDeviceMemory, eval_offsets, correct_predictions, true_labels);
}

if(ex != X.size()){
uint32_t j;
for(j=0;j<batch_size && ex != X.size();j++){
batch[j] = X[ex];
true_labels[j] = y[ex];
ex++;
}

for(;j<batch_size;j++){
batch[j] = std::vector<float>(input_size, 0);
true_labels[j] = std::vector<float>(get_output_dim(), 0);
}

eval_batch(batch, evalCommandBuffer, evalDeviceMemory, eval_offsets, correct_predictions, true_labels);
}

vkDestroyCommandPool(device, evalCommandPool, nullptr);
vkFreeMemory(device, evalDeviceMemory, nullptr);
vkDestroyBuffer(device, predicted_labels, nullptr);
vkDestroyDescriptorPool(device, evalDescriptorPool, nullptr);
vkDestroyPipeline(device, evalPipeline, nullptr);
vkDestroyPipelineLayout(device, evalPipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, evalSetLayout, nullptr);

return static_cast<float>(correct_predictions) / X.size();
}



22 changes: 14 additions & 8 deletions src/classification_trainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,26 @@ ClassificationTrainer::ClassificationTrainer(MLP* mlp,
}
}

void inline create_batch(std::vector<example>& batch, std::mt19937& gen, std::uniform_int_distribution<int>& distribution,
void inline create_batch(std::vector<example>& batch, uint32_t& iterator,
std::vector<example>& dataset, uint32_t batch_size){
for(uint32_t i=0;i<batch_size; i++){
batch[i] = dataset[distribution(gen)];
for(uint32_t k = 0;k<batch_size;k++){
batch[k] = dataset[iterator++];
iterator %= dataset.size();
}
}

void ClassificationTrainer::train(uint32_t num_iterations, uint32_t print_every) {
std::vector<float> tmp_loss_history;
train(num_iterations, tmp_loss_history, print_every);
}

void ClassificationTrainer::train(uint32_t num_iterations, std::vector<float> &loss_history, uint32_t print_every) {
std::vector<example> batch(mlp->get_batch_size());

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> distribution(0,dataset.size()-1);
uint32_t iterator = 0;

for(int i=0;i<num_iterations;i++){
create_batch(batch, gen, distribution, dataset, mlp->get_batch_size());
create_batch(batch, iterator, dataset, mlp->get_batch_size());

std::vector<std::vector<float>> x_batch(mlp->get_batch_size(), std::vector<float>(dataset[0].x.size()));
std::vector<std::vector<float>> y_batch(mlp->get_batch_size(), std::vector<float>(dataset[0].y.size()));
Expand Down Expand Up @@ -81,7 +85,9 @@ void ClassificationTrainer::train(uint32_t num_iterations, uint32_t print_every)
parameters_optimizer->optimize(get_queue());

if((print_every != 0 && i % print_every == 0) || i == num_iterations-1){
std::cout<<"step: "<<i<<" loss: "<<compute_loss(y_batch)<<std::endl;
float l = compute_loss(y_batch);
std::cout<<"step: "<<i<<" loss: "<<l<<std::endl;
loss_history.push_back(l);
}
}
}
Expand Down
Loading

0 comments on commit efee05a

Please sign in to comment.