-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnblearn3.py
83 lines (76 loc) · 2.51 KB
/
nblearn3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys
import os
import glob
filename = "C:\\Users\\dnish\\Desktop\\data\\train"
count = 0
hamCount = 0
spamCount = 0
hamDict = {}
spamDict = {}
print ("name")
stopwords = ["a","an","the","in","to","from","are","as","at","be","by","for","has","he","is","it","its","of","on","was","were","will","with"]
for root, subdirs, files in os.walk(filename):
print (root)
if("spam" in root or "ham" in root):
os.chdir(root)
for file in files:
if ".txt" not in file:
continue
count+=1
#print (s)
with open(file, "r", encoding="latin1") as name:
for line in name:
line = line.lower()
wordlist = line.split()
for word in wordlist:
# if word == "\n" or word =="":
# continue
if word.endswith("s"):
word = word[:-1]
elif word.endswith("ed"):
word = word[:-2]
elif word.endswith("ing"):
word = word[:-3]
if word in stopwords:
continue
if "spam" in root:
if(word in spamDict):
spamDict[word] +=1
else:
spamDict[word] = 1
if "ham" in root:
if (word in hamDict):
hamDict[word] += 1
else:
hamDict[word] = 1
if "ham" in root:
hamCount+=1
if "spam" in root:
spamCount+=1
for key in hamDict:
if key not in spamDict:
spamDict[key] = 0
hamDict[key] +=1
for key in spamDict:
if key not in hamDict:
hamDict[key] = 1
spamDict[key] += 1
spamWord = 0
hamWord = 0
for k in hamDict:
hamWord +=hamDict[k]
spamWord += spamDict[k]
print("spam word count"+str(spamWord))
print("ham word count"+str(hamWord))
f = open("C:\\Users\\dnish\\Desktop\\data\\nbmodel.txt", "w", encoding="latin1")
f.write(str(spamCount/count)+" \n")
f.write(str(hamCount/count)+" \n")
for k in hamDict:
val = hamDict[k]/hamWord
f.write(k+" "+str(val)+" \n")
f.write("Spam Prob \n")
for k in spamDict:
val = spamDict[k]/spamWord
f.write(k+" "+str(val)+" \n")
f.flush()
f.close()