Skip to content

Commit

Permalink
Added compression. Needs more info from Issue #17
Browse files Browse the repository at this point in the history
  • Loading branch information
cegme committed Oct 26, 2018
1 parent b4a841f commit 0a210ee
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions ingestor/africa/gettweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
pull in tweets."""

import argparse
import io
import gzip
from json import dumps
from os import environ
import sys
Expand All @@ -13,7 +13,7 @@

RUNID = environ.get('RUNID') or "USADEFAULT"
FILENAME = environ.get('TWEET_FILE') or \
'/data/tweetsdb/tweet_africa_{}.json'.format(time.strftime("%Y%m%d%H%M%S"))
'/data/tweetsdb/tweet_africa_{}.json.gz'.format(time.strftime("%Y%m%d%H%M%S"))


# Twitter API info:
Expand All @@ -27,23 +27,25 @@
# Connect to twitter.com/oudalab bismol app


class FoodStreamListener(tweepy.StreamListener):
class AfricaStreamListener(tweepy.StreamListener):
"""Extended Steam listener for these food tweets."""

# def __init__(self,api=None):
def __init__(self, api):
super(FoodStreamListener, self).__init__(api) # Python 3
self.twfile = io.open(FILENAME, 'w', encoding="utf-8")
super(AfricaStreamListener, self).__init__(api) # Python 3
self.twfile = gzip.open(FILENAME, 'wt', encoding="utf-8")
print('__init__ {}'.format(FILENAME), file=sys.stderr)

def on_status(self, status):
"""This is depreciated and not actually used anymore"""
# print('{}'.format(dumps(status._json)), file=sys.stderr)
print('{}'.format(dumps(status._json)), file=self.twfile)
sys.stderr.write('.')
# print('{}'.format(dumps(status._json)), file=self.twfile)
self.twfile.write('{}\n'.format(dumps(status._json)))
# sys.stderr.write('.')

def on_data(self, raw_data):
print('{}'.format(raw_data), file=self.twfile)
#print('{}'.format(raw_data), file=self.twfile)
self.twfile.write('{}\n'.format(raw_data))
return True

def on_error(self, status_code):
Expand Down Expand Up @@ -73,11 +75,9 @@ def start(args):

api = tweepy.API(auth, compression=True, wait_on_rate_limit=True)

mystreamlistener = FoodStreamListener(api)
mystreamlistener = AfricaStreamListener(api)
mystream = tweepy.Stream(auth=api.auth, listener=mystreamlistener)

print("Keywords:\n{}".format(get_keywords()), file=sys.stderr)

# English only tweets
languages = ["en"]
# US Bounding box
Expand Down

0 comments on commit 0a210ee

Please sign in to comment.