-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathIfClassification.py
61 lines (51 loc) · 2.45 KB
/
IfClassification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import logging
from datetime import datetime, timezone
from config import FILEDIR, FILEBREAK
import re
class IfClassification:
def __init__(self):
self.current_file = '/Users/delphineshi/Downloads/temp/tweets_2018-10-31T10:29:59.502392.txt'
self.nf_line_count =0
self.new_file = FILEDIR + "tweets_" + datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f") + ".txt"
def open_write(self):
with open(self.current_file, "r", encoding='utf-8') as f:
data = f.readlines()
#print (data)
for i in range (1,len(data)) :
ligne = data [i]
data_split = ligne.split(',')
print (data_split)
self.id = re.sub("'",'', data_split[0])
self.nb_follower = int(re.sub("'",'', data_split[1]))
self.nb_following = int(re.sub("'",'', data_split[2]))
self.verified = re.sub("'",'', data_split[3])
self.reputation = float(re.sub("'",'', data_split[4]))
self.age = re.sub("'",'', data_split[5])
self.nb_tweets = int(re.sub("'",'', data_split[6]))
self.time = re.sub("'",'', data_split[7])
self.proportion_spamwords= float(re.sub("'",'', data_split[8]))
self.orthographe= float(re.sub("'",'', data_split[9]))
self.nb_emoji= int(re.sub("'",'', data_split[10]))
self.RT = re.sub("[\"']",'', data_split[11])
self.spam = re.sub("[\"'\\n]", '', data_split[12])
with open(self.new_file, "a+", encoding='utf-8') as nf:
nf.write(ligne + self.classification() + "\n")
self.nf_line_count += 1
if self.nf_line_count > FILEBREAK:
logging.info("Closing file {}".format(self.current_file))
self.current_file = FILEDIR + "tweets_" + datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f") + ".txt"
self.nf_line_count = 0
def classification(self):
potentialspam = "true"
verdict =0
if self.nb_emoji == 0 :
if self.proportion_spamwords < 0.1 :
if self.orthographe > 0.5 :
if self.RT == "true":
potentialspam = "false"
if potentialspam == self.spam:
verdict = 1
print(verdict)
return potentialspam + ","+ str(verdict)
fichier = IfClassification()
fichier.open_write()