-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMNIST Fast and Dirty Neural Net
76 lines (59 loc) · 2.79 KB
/
MNIST Fast and Dirty Neural Net
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#This script follows the same script as the original script in the repository, however here the model selection is skipped and
#optimal parameters are applied directly. This script was submitted as an entry into Kaggle MNIST Digit Recognizer
library(readr)
# The competition datafiles are in the directory ../input
# Read competition data files:
train <- read_csv("../input/train.csv")
test <- read_csv("../input/test.csv")
# Write to the log:
cat(sprintf("Training set has %d rows and %d columns\n", nrow(train), ncol(train)))
cat(sprintf("Test set has %d rows and %d columns\n", nrow(test), ncol(test)))
X <- train[,-1]
Y <- train[,1]
trainlabel <- train[,1]
#Reducing Train and CV using PCA
Xreduced <- X/255
Xcov <- cov(X)
Xcov <- cov(Xreduced)
pcaX <- prcomp(Xcov)
# Creating a datatable to store and plot the
# No of Principal Components vs Cumulative Variance Explained
vexplained <- as.data.frame(pcaX$sdev^2/sum(pcaX$sdev^2))
vexplained <- cbind(c(1:784),vexplained,cumsum(vexplained[,1]))
colnames(vexplained) <- c("No_of_Principal_Components","Individual_Variance_Explained","Cumulative_Variance_Explained")
#Plotting the curve using the datatable obtained
plot(vexplained$No_of_Principal_Components,vexplained$Cumulative_Variance_Explained, xlim = c(0,100),type='b',pch=16,xlab = "Principal Componets",ylab = "Cumulative Variance Explained",main = 'Principal Components vs Cumulative Variance Explained')
#Datatable to store the summary of the datatable obtained
vexplainedsummary <- vexplained[seq(0,100,5),]
vexplainedsummary
#Storing the vexplainedsummary datatable in png format for future reference.
library(gridExtra)
png("datatablevaraince explained.png",height = 800,width =1000)
p <-tableGrob(vexplainedsummary)
grid.arrange(p)
dev.off()
Xfinal <- as.matrix(Xreduced) %*% pcaX$x[,1:45]
#Making training labels as factors
trainlabel <- as.factor(trainlabel)
library(nnet)
Y <- class.ind(Y)
print(X[1:5,1:5])
print(Y[1:5,])
#We choose no_of_nodes=150 and maxiter=100 (change it as a trade-off between running time and accuracy)
#Training the nnet on totat_training_set
finalseed <- 150
set.seed(finalseed)
model_final <- nnet(Xfinal,Y,size=150,softmax=TRUE,maxit=130,MaxNWts = 80000)
#Load test to reduced and normalize it for predictions
testlabel <- as.factor(test[,1])
#Applying PCA to test set
testreduced <- test/255
testfinal <- as.matrix(testreduced) %*% pcaX$x[,1:45]
#Calculating Final Accuracies
prediction <- predict(model_final,testfinal,type="class")
prediction <- as.data.frame(prediction);
finalprediction<- cbind(as.data.frame(1:nrow(prediction)),prediction);
colnames(finalprediction) <- c("ImageId","Label");
write.csv(finalprediction,file="predictions.csv",row.names=FALSE);
# Generate output files with write_csv(), plot() or ggplot()
# Any files you write to the current directory get shown as outputs