-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.c
79 lines (66 loc) · 1.98 KB
/
data.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#include <stdio.h>
#include <stdlib.h>
#include "data.h"
#include "structs.h"
#define SIZE_OF_IMAGE 784 // 28x28 training images
#define OUTPUT_VECTOR_SIZE 36 // 10 digits, 26 letters
// take an int array representing the place values of an integer and convert to actual value
int convertToInt(int places[], int i) {
int value = 0, placeValue = 1;
while (i >= 0) {
value += placeValue * places[i--];
placeValue *= 10;
}
return value;
}
// create output vector out of label (first digits 0-9, then letters 1-26);
Matrix * vectorizeLabel(int value, int isLetter) {
Matrix * v = initMatrix(OUTPUT_VECTOR_SIZE, 1);
if (!isLetter) {
v->at[value][0] = 1.0;
} else {
v->at[value + 9][0] = 1.0;
}
return v;
}
// construct a data set (of a given size) of image vector inputs and label vector ouputs off of a csv file
DataSet * readMNIST(char * filename, int size, int usingLetters) {
DataSet * d = initDataSet(size);
FILE * fp; // declare pointer to filestream
fp = fopen(filename, "r"); // open file for reading
if (fp != NULL) {
int c, i = 0, pairIndex = 0, label[2], pixel[3];
while (pairIndex < size) {
i = 0;
// establish label as output vector
while ((c = fgetc(fp)) != ',') {
label[i++] = c - '0';
}
int labelValue = convertToInt(label, i - 1);
d->outputs[pairIndex] = vectorizeLabel(labelValue, usingLetters);
// now vectorize pixels
i = 0;
int vectorIndex = 0;
Matrix * inputVector = initMatrix(SIZE_OF_IMAGE, 1);
// while end of image not yet reached
while ((c = fgetc(fp)) != '\n') {
// if end of pixel
if (c == ',') {
inputVector->at[vectorIndex++][0] = convertToInt(pixel, i - 1); // add pixel intensity to input vector
i = 0;
} else {
pixel[i++] = c - '0'; // get int representation of next char
}
}
// add input vector to dataset
d->inputs[pairIndex++] = inputVector;
// if at end, break
if (feof(fp))
break;
}
} else {
perror("Error reading file (data.c)");
}
fclose(fp);
return d;
}