-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
91 lines (63 loc) · 2.43 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from functions import *
from Main import *
import math
import time, sys
#import twitter
import urllib3
import re
def cleanString(string):
string = re.sub(r'http\S+', '', string)
string = re.sub(r'RT ', '', string)
string = re.sub(r'@\S+', '', string)
string = re.sub(r'#\S+', '', string)
string = re.sub(r'&', '&', string)
string = re.sub(r'\s.\s', ' ', string)
string = re.sub(r' ', ' ', string)
return string
#return re.sub(r'^https?:\/\/.*[\r\n]*', '', string)
def printTweets(statuses, includeDatesToConsole=False, fileName=None):
if (fileName==None):
for i in statuses:
print(i.text)
else:
file1 = open(fileName, "a")
for i in range (0, len(statuses)):
status = statuses[i]
if ((i > 0) and (status == statuses[i-1])):
continue
stringToWrite = status.text
stringToWrite = cleanString(stringToWrite)
stringToWrite += '\n'
stringToWrite = (stringToWrite.encode("utf-8"))
file1.write(stringToWrite)
if (includeDatesToConsole):
print(status.created_at)
file1.close()
def getTweetsFromTimeline(api, numOfTweets=1, startingID=None):
statuses = []
oldestTweet = startingID
numOfTweets = min(800, numOfTweets) #limits number of tweets retrieved to 800
while (numOfTweets > 0):
getCount = min(numOfTweets, 200)
numOfTweets -= getCount
new_statuses = api.GetHomeTimeline(count=getCount, max_id=oldestTweet)
for i in range( (len(new_statuses)-1), -1, -1):
if (new_statuses[i].id == TWITTER_ID):
print("DELETED SELF TWEET")
del new_statuses[i]
statuses.extend(new_statuses)
oldestTweet = (statuses[len(statuses)-1]).id
return statuses
def getTweetsFromUser(api, userID, numOfCalls=1, startingID=None, countPerCall=200):
statuses = []
oldestTweet = startingID
countPerCall = min(200, countPerCall)
for i in range(0, numOfCalls):
statuses.extend(api.GetUserTimeline(user_id=userID, max_id=oldestTweet, count=countPerCall))
oldestTweet = (statuses[len(statuses)-1]).id
return statuses
def compileTweets(api=getAPI(), outputFile="RETRIEVED_TWEETS.txt", num=800):
statuses = getTweetsFromTimeline(api, num)
printTweets(statuses, True, outputFile)
print(len(statuses))
return outputFile