diff --git a/.gitignore b/.gitignore index 0d55a41..df216a1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Compiled python modules *.pyc +.env # Secrets python file /secrets.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8bd0861 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.6-alpine + +# +# Handle the env info +# +ARG BUILD_COMMIT_SHA1 +ARG BUILD_COMMIT_DATE +ARG BUILD_BRANCH +ARG BUILD_DATE +ARG BUILD_REPO_ORIGIN + +ENV BUILD_COMMIT_SHA1=$BUILD_COMMIT_SHA1 +ENV BUILD_COMMIT_DATE=$BUILD_COMMIT_DATE +ENV BUILD_BRANCH=$BUILD_BRANCH +ENV BUILD_DATE=$BUILD_DATE +ENV BUILD_REPO_ORIGIN=$BUILD_REPO_ORIGIN + +RUN apk add --update alpine-sdk +RUN python -m textblob.download_corpora + +WORKDIR /src + +ADD . /src + +RUN pip install -r requirements.txt + +ENTRYPOINT ["python", "tweets_analyzer.py"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7e501dd --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +.PHONY: all build login push run + +NAME := 'tweets_analyzer' +REGISTRY := 'registry.hub.docker.com' +TAG := $$(git log -1 --pretty=%h) +VERSION := ${NAME}:${TAG} +LATEST := ${NAME}:latest + +BUILD_REPO_ORIGIN=$$(git config --get remote.origin.url) +BUILD_COMMIT_SHA1:=$$(git rev-parse --short HEAD) +BUILD_COMMIT_DATE:=$$(git log -1 --date=short --pretty=format:%ct) +BUILD_BRANCH:=$$(git symbolic-ref --short HEAD) +BUILD_DATE:=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + +all: build login push + + +build: + docker build -t ${LATEST} -t ${VERSION} -t ${REGISTRY}/${LATEST} -t ${REGISTRY}/${VERSION} \ + --build-arg BUILD_COMMIT_SHA1=${BUILD_COMMIT_SHA1} \ + --build-arg BUILD_COMMIT_DATE=${BUILD_COMMIT_DATE} \ + --build-arg BUILD_BRANCH=${BUILD_BRANCH} \ + --build-arg BUILD_DATE=${BUILD_DATE} \ + --build-arg BUILD_REPO_ORIGIN=${BUILD_REPO_ORIGIN} \ + . + +login: + docker login ${REGISTRY} + +push: + docker push ${REGISTRY}/${VERSION} + docker push ${REGISTRY}/${LATEST} + +run: + docker run --rm -it --env-file ${PWD}/.env ${LATEST} \ No newline at end of file diff --git a/README.md b/README.md index 8aed9a1..3d7a812 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,22 @@ You will need the following python packages installed: tweepy, ascii_graph, tqdm pip install -r requirements.txt ``` +put these into `.env` + +```sh +TWITTER_CONSUMER_KEY=xxxxxx +TWITTER_CONSUMER_SECRET=xxxxxx +TWITTER_ACCESS_TOKEN=xxxxxx +TWITTER_ACCESS_TOKEN_SECRET=xxxxxx +``` + +then load the .env file and your keys are now present + +```sh +source source .env +``` + + ### Usage @@ -52,6 +68,15 @@ optional arguments: --no-retweets does not evaluate retweets ``` +### Docker + +```sh +# will build the docker image and tag it +make build +# will run it so you can append tags +docker run --rm -it --env-file ${PWD}/.env -v $PWD:/src tweets_analyzer:latest -n x0rz --friends +``` + ### Example output ![Twitter account activity](https://cdn-images-1.medium.com/max/800/1*KuhfDr_2bOJ7CPOzVXnwLA.png) diff --git a/requirements.txt b/requirements.txt index cf7b9ef..2d5bac1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ six==1.11.0 tqdm==4.25.0 tweepy==3.6.0 urllib3==1.23 +textblob==0.15.3 +prettytable \ No newline at end of file diff --git a/secrets.py b/secrets.py index ac0102b..ca6dbd2 100755 --- a/secrets.py +++ b/secrets.py @@ -1,9 +1,10 @@ +import os # Go to https://apps.twitter.com/ and create an app. # The consumer key and secret will be generated for you after -consumer_key="xxxxxxxxxxxxxx" -consumer_secret="xxxxxxxxxxxxx" +consumer_key = os.getenv('TWITTER_CONSUMER_KEY') +consumer_secret = os.getenv('TWITTER_CONSUMER_SECRET') # After the step above, you will be redirected to your app's page. # Create an access token under the the "Create New App" section -access_token="xxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxx" -access_token_secret="xxxxxxxxxxxxxxxxxxxxxxx" +access_token = os.getenv('TWITTER_ACCESS_TOKEN') +access_token_secret = os.getenv('TWITTER_ACCESS_TOKEN_SECRET') diff --git a/tweets_analyzer.py b/tweets_analyzer.py index 7a96129..28349f5 100755 --- a/tweets_analyzer.py +++ b/tweets_analyzer.py @@ -19,9 +19,11 @@ from __future__ import unicode_literals +from textblob import TextBlob from ascii_graph import Pyasciigraph -from ascii_graph.colors import Gre, Yel, Red -from ascii_graph.colordata import hcolor +from ascii_graph.colors import Gre, Yel, Red, Blu +from ascii_graph.colordata import hcolor, vcolor +from prettytable import PrettyTable from tqdm import tqdm import tweepy import numpy @@ -102,6 +104,13 @@ detected_sources = collections.Counter() detected_places = collections.Counter() geo_enabled_tweets = 0 +subjectivity = collections.Counter() +polarity = collections.Counter() +sentiment_tweets = { + 'positive': [], + 'negative': [], + 'neutral': [], +} detected_hashtags = collections.Counter() detected_domains = collections.Counter() detected_timezones = collections.Counter() @@ -172,6 +181,44 @@ def process_tweet(tweet): tweet.place.name = tweet.place.name detected_places[tweet.place.name] += 1 + # detect sentiment of tweet + blob = TextBlob(tweet.text) + blob.tags + for sentence in blob.sentences: + positive, neutral, negative = 0,0,0 + # import pdb;pdb.set_trace() + # print(sentence.sentiment.polarity) + # print(blob.sentiment_assessments) + if sentence.sentiment.polarity > 0: + positive += 1 + if sentence.sentiment.polarity == 0: + neutral += 1 + if sentence.sentiment.polarity < 0: + negative += 1 + if sentence.sentiment.subjectivity > 0: + polarity['subjective'] +=1 + if sentence.sentiment.subjectivity == 0: + polarity['neutral'] += 1 + if sentence.sentiment.subjectivity < 0: + polarity['negative'] += 1 + + x = collections.Counter({'positive': positive, + 'negative': negative, + 'neutral': neutral}) + # print(x.most_common()) + key, value = x.most_common()[0] + + if key == 'positive': + subjectivity['positive'] += value + sentiment_tweets['positive'] += [(tweet, blob.sentiment_assessments)] + if key == 'neutral': + subjectivity['neutral'] += value + sentiment_tweets['neutral'] += [(tweet, blob.sentiment_assessments)] + if key == 'negative': + subjectivity['negative'] += value + sentiment_tweets['negative'] += [(tweet, blob.sentiment_assessments)] + + # Updating hashtags list if tweet.entities['hashtags']: for ht in tweet.entities['hashtags']: @@ -323,6 +370,42 @@ def print_charts(dataset, title, weekday=False): print(line) cprint("") +def print_simple_chart(dataset, title): + pattern = [Yel] + # import pdb;pdb.set_trace() + data = vcolor(dataset.most_common(), pattern) + + graph = Pyasciigraph( + separator_length=4, + multivalue=False, + human_readable='si', + ) + + if args.json is False: + for line in graph.graph(title, data): + if not color_supported: + ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + line = ansi_escape.sub('', line) + print(line) + +def print_sample_tweets(dataset): + + for layer in dataset.keys(): + table = PrettyTable() + table.field_names = ["Tweet", "Date", "Sentiment", "Subjectivity", "Link"] + cprint("[+] Tweets that are {}".format(layer)) + if layer in ['negative']: + # we want the MOST negative things said + data = sorted([i for i in dataset[layer]], key=lambda ii: ii[1].polarity, reverse=False) + else: + # and the most positive/nutral by default + data = sorted([i for i in dataset[layer]], key=lambda ii: ii[1].polarity, reverse=True) + + for tweet,sentiment in data[0:10]: + table.add_row([tweet.text.strip(), str(tweet.created_at), sentiment.polarity, sentiment.subjectivity, f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}"]) + + print(table) + def main(): global color_supported @@ -439,6 +522,11 @@ def main(): print_stats(detected_domains, top=6) jsono["top_referenced_domains"] = detected_domains + cprint("[+] Sentiment") + print_simple_chart(subjectivity, title='Sentiment') + print_simple_chart(polarity, title='Subjectivity') + print_sample_tweets(sentiment_tweets) + if args.friends: max_friends = numpy.amin([user_info.friends_count, 300]) cprint("[+] Getting %d @%s's friends data..." % (max_friends, args.name)) @@ -473,5 +561,5 @@ def main(): main() except tweepy.error.TweepError as e: cprint("[\033[91m!\033[0m] Twitter error: %s" % e) - except Exception as e: - cprint("[\033[91m!\033[0m] Error: %s" % e) + # except Exception as e: + # cprint("[\033[91m!\033[0m] Error: %s" % e)