-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGrid Search.R
128 lines (89 loc) · 4.32 KB
/
Grid Search.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# ---------------------------------------------------- Importing Data ----------------------------------------- #
dataset = read.csv('Social_Network_Ad.csv')
# Selecting particular columns
dataset = dataset[3:5]
# ---------------------------------------- Encoding the target feature as factor ------------------------------ #
dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
# ---------------------------------- Splitting the dataset into Training and Test Set ------------------------- #
# install.packages('caTools')
library(caTools)
set.seed(123)
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
# --------------------------------------------------- Feature Scalling ----------------------------------------- #
training_set[-3] = scale(training_set[-3])
test_set[-3] = scale(test_set[-3])
# ---------------------------------------- Fitting Kernel SVM to the Training set ----------------------------- #
# install.packages('e1071')
library(e1071)
classifier = svm(formula = Purchased ~ .,
data = training_set,
type = 'C-classification',
kernel = 'radial')
# -------------------------------------------- Predicting the Test set results -------------------------------- #
y_pred = predict(classifier, newdata = test_set[-3])
y_pred
# ------------------------------------------------- Confusion Matrix ------------------------------------------- #
cm = table(test_set[, 3], y_pred)
cm
# ------------------------------------------- Applying k-Fold Cross Validation --------------------------------- #
# install.packages('caret')
library(caret)
folds = createFolds(training_set$Purchased, k = 10)
cv = lapply(folds, function(x) {
training_fold = training_set[-x, ]
test_fold = training_set[x, ]
classifier = svm(formula = Purchased ~ .,
data = training_fold,
type = 'C-classification',
kernel = 'radial')
y_pred = predict(classifier, newdata = test_fold[-3])
cm = table(test_fold[, 3], y_pred)
accuracy = (cm[1,1] + cm[2,2]) / (cm[1,1] + cm[2,2] + cm[1,2] + cm[2,1])
return(accuracy)
})
accuracy = mean(as.numeric(cv))
accuracy
cv
# --------------------------------- Applying Grid Search to find the best parameters -------------------------- #
# install.packages('caret')
library(caret)
classifier = train(form = Purchased ~ ., data = training_set, method = 'svmRadial')
classifier
classifier$bestTune
# --------------------------------------- Visualising the Training Set results --------------------------------- #
library(ElemStatLearn)
set = training_set
train1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
train2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(train1, train2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifier, newdata = grid_set)
# Plotting
plot(set[, -3],
main = 'K-Fold Cross Validation (Training set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(train1), ylim = range(train2))
# Regression Line
contour(train1, train2, matrix(as.numeric(y_grid), length(train1), length(train2)), add = TRUE)
# Giving Colours
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
# ------------------------------------------- Visualising the Test Set results --------------------------------- #
library(ElemStatLearn)
set = test_set
test1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
test2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(test1, test2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifier, newdata = grid_set)
# Plotting
plot(set[, -3], main = 'K-Fold Cross Validation (Test set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(test1), ylim = range(test2))
# Regression Line
contour(test1, test2, matrix(as.numeric(y_grid), length(test1), length(test2)), add = TRUE)
# Giving Colours
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))