-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVMTest.py
67 lines (57 loc) · 1.53 KB
/
SVMTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from sklearn import svm
import csv
#Test for SVM
def classifyDocs(src):
with open(src,'rb') as data:
reader = data.readlines()
train_data = []
train_labels = []
test_data = []
test_labels = []
count = 0
train = True
length = 0
for lines in reader:
length = length + 1
for lines in reader:
count = count + 1
if count == 1:
continue
elif count == int(length*0.9):
train = False
continue
elif train:
train_list = []
for num in lines.split(","):
if num == lines.split(",")[0] or num == lines.split(",")[1] or num == "\n":
continue
else:
ins_num = num.replace("\"","").lower()
train_list.append(float(ins_num))
train_data.append(train_list)
train_labels.append(lines.split(",")[1])
else:
test_list= []
for num in lines.split(","):
if num == lines.split(",")[0] or num == lines.split(",")[1] or num == "\n":
continue
else:
ins_num = num.replace("\"", "").lower()
test_list.append(float(ins_num))
test_data.append(test_list)
test_labels.append(lines.split(",")[1])
clf = svm.SVC(kernel = 'linear')
clf.fit(train_data,train_labels)
results =0
true = 0
false = 0
for lines in test_data:
prediction = clf.predict(lines)[0]
if prediction==test_labels[results]:
true = true + 1
else:
false = false + 1
results = results + 1
print "Accuracy: " + str(true/float(results))
for i in range(1, 11):
classifyDocs("/Users/the_james_marq/PAN/outputs/9Feb14-100topics-236602docs/folds/fold"+str(i)+".csv")