diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62bf635 --- /dev/null +++ b/.gitignore @@ -0,0 +1,363 @@ +pulumi/Pulumi.prod.yaml + + +# Created by https://www.toptal.com/developers/gitignore/api/python,azurefunctions,jetbrains+all,visualstudiocode,windows,macos,linux +# Edit at https://www.toptal.com/developers/gitignore?templates=python,azurefunctions,jetbrains+all,visualstudiocode,windows,macos,linux + +### AzureFunctions ### +# Azure Functions localsettings file +local.settings.json + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### JetBrains+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + +.idea/* + +!.idea/codeStyles +!.idea/runConfigurations + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# Support for Project snippet scope + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/python,azurefunctions,jetbrains+all,visualstudiocode,windows,macos,linux + + + +# From default Azure Functions extention generated gitignore + +# Azure Functions artifacts +bin +obj +appsettings.json + +# Azurite artifacts +__blobstorage__ +__queuestorage__ +__azurite_db*__.json +.python_packages \ No newline at end of file diff --git a/.img/cloud-news-summarizer.png b/.img/cloud-news-summarizer.png new file mode 100644 index 0000000..fa7a7a6 Binary files /dev/null and b/.img/cloud-news-summarizer.png differ diff --git a/.img/diagram.py b/.img/diagram.py new file mode 100644 index 0000000..e561b66 --- /dev/null +++ b/.img/diagram.py @@ -0,0 +1,31 @@ +from diagrams import Cluster, Diagram +from diagrams.azure.compute import FunctionApps +from diagrams.azure.storage import StorageAccounts, QueuesStorage, TableStorage +from diagrams.azure.ml import CognitiveServices +from diagrams.oci.monitoring import Alarm +from diagrams.saas.chat import Slack + +with Diagram("cloud-news-summarizer", show=False, outformat="png"): + with Cluster("{prefix}cloudnews\nStorage Account"): + queue = QueuesStorage("processqueue\nQueue") + poison_queue = QueuesStorage("processqueue-poison\nQueue") + table = TableStorage("rsscheckpoint\nTable") + + timer = Alarm("30m schedule trigger") + rss = FunctionApps("AwsRssChecker\nFunction") + processor = FunctionApps("AwsPostHandler\nFunction") + failure = FunctionApps("FailureHandler\nFunction") + + cog = CognitiveServices("{prefix}-cloud-news-cognitive\nCognitive Service") + slack = Slack("Slack notification") + + timer >> rss + rss >> table + rss << table + rss >> queue + queue >> processor + processor >> poison_queue + processor >> cog + processor << cog + poison_queue >> failure + [processor, failure] >> slack diff --git a/.img/requirements.txt b/.img/requirements.txt new file mode 100644 index 0000000..30b253b --- /dev/null +++ b/.img/requirements.txt @@ -0,0 +1 @@ +diagrams~=0.21.1 \ No newline at end of file diff --git a/README.md b/README.md index 39c58a5..15da2f8 100644 --- a/README.md +++ b/README.md @@ -1 +1,34 @@ -# cloud-news-summariser \ No newline at end of file +# cloud-news-summarizer +An Azure Function and associated resources that takes RSS feeds of cloud news (eg _AWS What's New_), generates a 3-sentence summary with Azure's Text Analytics, and sends the summary to Slack (with a link to the original post). + +The Function polls the relevant RSS feeds every 30 minutes. + +It is expected that both the Function and the Language Cognitive Services usage will fall within the free tier on Azure, or have very low cost (cents per month). Other resource usage is negligible and will likely also be free. For additional pricing information, see +- [Functions pricing](https://azure.microsoft.com/en-us/pricing/details/functions/) +- [Language Cognitive Services pricing](https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/language-service/) + +## Architecture +![Architecture diagram](./.img/cloud-news-summarizer.png) + +## Usage / Deployment +Supporting resources are maintained in IaC with Pulumi. +1. Head to the [pulumi/](./pulumi) directory and follow the instructions to deploy that stack. +2. Deploy the Azure Functions in the [functions/](./functions) directory using either the Functions CLI, or the VS Code extension, choosing the `python3.9` runtime. Ensure that you set the required Applications Settings as detailed below in the deployed Function App resource. + - Note: If you deployed the Functions before setting the below config in the Function App, you may need to redeploy the functions for it to take effect + +The following Application Settings are required to be present on the deployed Function App: +- `TABLE_SA_CONNECTION`: connection string for storage account created in Pulumi - _available in your Storage Account resource in the Portal_ +- `TABLE_NAME`: table name within storage account - _listed as a Pulumi output_ +- `ENVIRONMENT`: table row key - _string to differentiate multiple deployments, can be anything alphanumeric, eg `prod`_ +- `QUEUE_NAME`: queue name within storage account - _listed as a Pulumi output_ +- `COGNITIVE_ENDPOINT`: endpoint URL (including `https://`) for the cognitive services resource - _listed as a Pulumi output_ +- `COGNITIVE_KEY`: key for the cognitive services resource - _available in your Language resource in the Portal_ +- `SLACK_WEBHOOK`: webhook URL for sending to Slack - _see the [Slack docs](https://api.slack.com/messaging/webhooks) if you aren't sure_ +- `SLACK_FAILURE_WEBHOOK`: webhook URL for processing failure alerts to Slack - _can be the same or different to the normal Slack webhook (ie optionally send failures to a different channel)_ + +## Current feeds supported +Now: +- AWS What's New (https://aws.amazon.com/about-aws/whats-new/recent/feed/) + +Next: +- Azure Updates (https://azurecomcdn.azureedge.net/en-gb/updates/feed/) diff --git a/functions/.funcignore b/functions/.funcignore new file mode 100644 index 0000000..2048c9e --- /dev/null +++ b/functions/.funcignore @@ -0,0 +1,6 @@ +.git* +.vscode +local.settings.json +test +.venv +venv \ No newline at end of file diff --git a/functions/.vscode/extensions.json b/functions/.vscode/extensions.json new file mode 100644 index 0000000..cbbad0f --- /dev/null +++ b/functions/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "ms-azuretools.vscode-azurefunctions", + "ms-python.python" + ] +} diff --git a/functions/.vscode/launch.json b/functions/.vscode/launch.json new file mode 100644 index 0000000..4508b45 --- /dev/null +++ b/functions/.vscode/launch.json @@ -0,0 +1,12 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Attach to Python Functions", + "type": "python", + "request": "attach", + "port": 9091, + "preLaunchTask": "func: host start" + } + ] +} \ No newline at end of file diff --git a/functions/.vscode/settings.json b/functions/.vscode/settings.json new file mode 100644 index 0000000..1562c57 --- /dev/null +++ b/functions/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "azureFunctions.deploySubpath": ".", + "azureFunctions.scmDoBuildDuringDeployment": true, + "azureFunctions.pythonVenv": ".venv", + "azureFunctions.projectLanguage": "Python", + "azureFunctions.projectRuntime": "~4", + "debug.internalConsoleOptions": "neverOpen" +} \ No newline at end of file diff --git a/functions/.vscode/tasks.json b/functions/.vscode/tasks.json new file mode 100644 index 0000000..8e2de79 --- /dev/null +++ b/functions/.vscode/tasks.json @@ -0,0 +1,26 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "func", + "command": "host start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + "dependsOn": "pip install (functions)" + }, + { + "label": "pip install (functions)", + "type": "shell", + "osx": { + "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" + }, + "windows": { + "command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt" + }, + "linux": { + "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" + }, + "problemMatcher": [] + } + ] +} \ No newline at end of file diff --git a/functions/AwsPostHandler/README.md b/functions/AwsPostHandler/README.md new file mode 100644 index 0000000..569609e --- /dev/null +++ b/functions/AwsPostHandler/README.md @@ -0,0 +1,2 @@ +# AwsPostHandler +Uses BeautifulSoup to parse AWS news posts picked up from the `QUEUE_NAME` stroage queue, sends the main post content to Azure's text summarization service, and posts the result to Slack (with `SLACK_WEBHOOK`). \ No newline at end of file diff --git a/functions/AwsPostHandler/__init__.py b/functions/AwsPostHandler/__init__.py new file mode 100644 index 0000000..e90397c --- /dev/null +++ b/functions/AwsPostHandler/__init__.py @@ -0,0 +1,76 @@ +import json +import logging +import os + +import azure.functions as func +import requests +from bs4 import BeautifulSoup +import azure.ai.textanalytics as textanalytics +import azure.core.credentials as credentials + +# Gather required environment variables +COGNITIVE_ENDPOINT = os.environ["COGNITIVE_ENDPOINT"] +COGNITIVE_KEY = os.environ["COGNITIVE_KEY"] +SLACK_WEBHOOK = os.environ["SLACK_WEBHOOK"] + + +def create_client() -> textanalytics.TextAnalyticsClient: + creds = credentials.AzureKeyCredential(COGNITIVE_KEY) + client = textanalytics.TextAnalyticsClient(endpoint=COGNITIVE_ENDPOINT, credential=creds) + return client + + +def main(msg: func.QueueMessage) -> None: + message = msg.get_body().decode("utf-8") + logging.info("Processing {}".format(message)) + + entry = requests.get(message) + if entry.status_code != 200: + logging.error(entry.text) + raise Exception("Non-200 response {} from target: {}".format(entry.status_code, message)) + + soup = BeautifulSoup(entry.text, "html.parser") + article_paragraphs = soup.find_all("div", class_="aws-text-box") + article_title = soup.title.text + + article_text = "" + for paragraph in article_paragraphs: + article_text += (paragraph.text.replace("\n", "") + "\n") + + if article_text != "": + summarise_client = create_client() + poller = summarise_client.begin_analyze_actions( + documents=[article_text], + actions=[textanalytics.ExtractSummaryAction(max_sentance_count=3)]) + + summarise_results = poller.result() + for result in summarise_results: + if result[0].is_error: + logging.error("Summarisation error: code {}, message {}".format(result[0].code, result[0].message)) + raise Exception("Summarisation failure") + else: + logging.info("Summary:\n{}".format(" ".join([sentence.text for sentence in result[0].sentences]))) + slack_blocks = { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "*<{}|{}>*".format(message, article_title) + } + }, + { + "type": "section", + "text": { + "type": "plain_text", + "text": "\n".join([sentence.text for sentence in result[0].sentences]) + } + } + ] + } + slack_response = requests.post(SLACK_WEBHOOK, json.dumps(slack_blocks)) + if slack_response.status_code != 200: + logging.warning("Non-200 from Slack: {} {}".format(slack_response.status_code, slack_response.text)) + raise Exception("Failed to send to Slack") + else: + raise Exception("Failed to parse article") diff --git a/functions/AwsPostHandler/function.json b/functions/AwsPostHandler/function.json new file mode 100644 index 0000000..f02006c --- /dev/null +++ b/functions/AwsPostHandler/function.json @@ -0,0 +1,12 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "msg", + "type": "queueTrigger", + "direction": "in", + "queueName": "processqueue", + "connection": "TABLE_SA_CONNECTION" + } + ] +} diff --git a/functions/AwsPostHandler/sample.dat b/functions/AwsPostHandler/sample.dat new file mode 100644 index 0000000..f05bb51 --- /dev/null +++ b/functions/AwsPostHandler/sample.dat @@ -0,0 +1 @@ +sample queue data \ No newline at end of file diff --git a/functions/AwsRssChecker/README.md b/functions/AwsRssChecker/README.md new file mode 100644 index 0000000..fdd2a41 --- /dev/null +++ b/functions/AwsRssChecker/README.md @@ -0,0 +1,4 @@ +# AwsRssChecker +Polls the _AWS What's New_ RSS feed on a regular interval (30-minute timer by default) to check for new articles. If any found, add them to the `QUEUE_NAME` queue for processing by the `AwsPostHandler` function. + +Stores the last successful check timestamp in an Azure Storage Table `TABLE_NAME` to avoid duplication. \ No newline at end of file diff --git a/functions/AwsRssChecker/__init__.py b/functions/AwsRssChecker/__init__.py new file mode 100644 index 0000000..72d3c71 --- /dev/null +++ b/functions/AwsRssChecker/__init__.py @@ -0,0 +1,85 @@ +import logging +import os +import time +from typing import Union + +import azure.data.tables as tables +import azure.functions as func +import azure.storage.queue as queue +import feedparser + +FEED_URL = "https://aws.amazon.com/about-aws/whats-new/recent/feed/" + +# Gather required environment variables +CONNECTION_STRING = os.environ["TABLE_SA_CONNECTION"] +TABLE_NAME = os.environ["TABLE_NAME"] +QUEUE_NAME = os.environ["QUEUE_NAME"] +ENVIRONMENT = os.environ["ENVIRONMENT"] + + +def get_checkpoint(connection_string: str, table_name: str, partition_key: str, row_key: str) -> Union[int, None]: + try: + table_service = tables.TableServiceClient.from_connection_string(connection_string) + table_client = table_service.get_table_client(table_name) + checkpoint = table_client.get_entity(partition_key=partition_key, row_key=row_key) + return int(checkpoint["ts"]) + except Exception as e: + logging.warning("Exception getting checkpoint: {}".format(e)) + return None + + +def set_checkpoint(connection_string: str, table_name: str, partition_key: str, row_key: str) -> None: + try: + table_service = tables.TableServiceClient.from_connection_string(connection_string) + table_client = table_service.get_table_client(table_name) + checkpoint_out = { + "PartitionKey": partition_key, + "RowKey": row_key, + "ts": str(int(time.time())) + } + table_client.upsert_entity(checkpoint_out) + except Exception as e: + logging.warning("Exception setting checkpoint: {}".format(e)) + + +def get_rss(url: str, last_run: time.struct_time) -> Union[feedparser.FeedParserDict, None]: + feed = feedparser.parse(url) + try: + if feed.feed.published_parsed > last_run: + return feed + else: + logging.info("Feed not updated since last check") + return None + except Exception as e: + logging.warning("Exception checking feed publish timestamp: {}".format(e)) + return feed + + +def process_entry(entry: feedparser.util.FeedParserDict, last_run: time.struct_time, + queue_client: queue.QueueClient) -> None: + if entry.published_parsed > last_run: + logging.info("New entry: {} {}".format(entry.title, entry.link)) + queue_client.send_message(bytes(entry.link, "utf-8")) + logging.info("Added {} to queue".format(entry.link)) + + +def main(timer: func.TimerRequest) -> None: + checkpoint = get_checkpoint(CONNECTION_STRING, TABLE_NAME, "aws", ENVIRONMENT) + if checkpoint is not None: + logging.info("Using {} as checkpoint".format(checkpoint)) + feed = get_rss(FEED_URL, time.gmtime(checkpoint)) + else: + logging.info("No checkpoint - using current time minus 30m") + feed = get_rss(FEED_URL, time.gmtime(time.time() - (30 * 60))) + + if feed is not None: + queue_client = queue.QueueClient.from_connection_string(CONNECTION_STRING, QUEUE_NAME, + message_encode_policy=queue.BinaryBase64EncodePolicy(), + message_decode_policy=queue.BinaryBase64DecodePolicy()) + for entry in feed.entries: + if checkpoint is not None: + process_entry(entry, time.gmtime(checkpoint), queue_client) + else: + process_entry(entry, time.gmtime(time.time() - (30 * 60)), queue_client) + + set_checkpoint(CONNECTION_STRING, TABLE_NAME, "aws", ENVIRONMENT) diff --git a/functions/AwsRssChecker/function.json b/functions/AwsRssChecker/function.json new file mode 100644 index 0000000..5369fd6 --- /dev/null +++ b/functions/AwsRssChecker/function.json @@ -0,0 +1,11 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "timer", + "type": "timerTrigger", + "direction": "in", + "schedule": "0 */30 * * * *" + } + ] +} diff --git a/functions/AwsRssChecker/sample.dat b/functions/AwsRssChecker/sample.dat new file mode 100644 index 0000000..e69de29 diff --git a/functions/FailureHandler/README.md b/functions/FailureHandler/README.md new file mode 100644 index 0000000..c4ae18d --- /dev/null +++ b/functions/FailureHandler/README.md @@ -0,0 +1,2 @@ +# FailureHandler +Function to monitor the `-poison` Azure-generated dead-letter queue for processing failures, and send them to Slack (with `SLACK_FAILURE_WEBHOOK`) when found. diff --git a/functions/FailureHandler/__init__.py b/functions/FailureHandler/__init__.py new file mode 100644 index 0000000..66372a8 --- /dev/null +++ b/functions/FailureHandler/__init__.py @@ -0,0 +1,18 @@ +import json +import logging +import os + +import azure.functions as func +import requests + +SLACK_FAILURE_WEBHOOK = os.environ["SLACK_FAILURE_WEBHOOK"] + + +def main(msg: func.QueueMessage) -> None: + message = msg.get_body().decode("utf-8") + logging.info("Processing {}".format(message)) + slack_payload = {"text": "Failed to process {}".format(message)} + slack_response = requests.post(SLACK_FAILURE_WEBHOOK, json.dumps(slack_payload)) + if slack_response.status_code != 200: + logging.error("Non-200 from Slack: {} {}".format(slack_response.status_code, slack_response.text)) + raise Exception("Failed to send to Slack") diff --git a/functions/FailureHandler/function.json b/functions/FailureHandler/function.json new file mode 100644 index 0000000..53dc796 --- /dev/null +++ b/functions/FailureHandler/function.json @@ -0,0 +1,12 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "msg", + "type": "queueTrigger", + "direction": "in", + "queueName": "processqueue-poison", + "connection": "TABLE_SA_CONNECTION" + } + ] +} diff --git a/functions/FailureHandler/sample.dat b/functions/FailureHandler/sample.dat new file mode 100644 index 0000000..f05bb51 --- /dev/null +++ b/functions/FailureHandler/sample.dat @@ -0,0 +1 @@ +sample queue data \ No newline at end of file diff --git a/functions/host.json b/functions/host.json new file mode 100644 index 0000000..3f33af1 --- /dev/null +++ b/functions/host.json @@ -0,0 +1,15 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[2.*, 3.0.0)" + } +} diff --git a/functions/requirements.txt b/functions/requirements.txt new file mode 100644 index 0000000..7f5e4f2 --- /dev/null +++ b/functions/requirements.txt @@ -0,0 +1,12 @@ +# DO NOT include azure-functions-worker in this file +# The Python Worker is managed by Azure Functions platform +# Manually managing azure-functions-worker may cause unexpected issues + +azure-functions + +azure-ai-textanalytics~=5.2.0b3 +azure-data-tables~=12.3.0 +azure-storage-queue~=12.2.0 +beautifulsoup4~=4.11.1 +feedparser~=6.0.8 +requests~=2.27.1 \ No newline at end of file diff --git a/pulumi/.gitignore b/pulumi/.gitignore new file mode 100644 index 0000000..a3807e5 --- /dev/null +++ b/pulumi/.gitignore @@ -0,0 +1,2 @@ +*.pyc +venv/ diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000..a787a9d --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,6 @@ +name: cloud-news-summarizer +runtime: + name: python + options: + virtualenv: venv +description: Azure resources to take and summarize cloud news articles diff --git a/pulumi/README.md b/pulumi/README.md new file mode 100644 index 0000000..021543b --- /dev/null +++ b/pulumi/README.md @@ -0,0 +1,12 @@ +# Supporting resources +## Pulumi stack +This stack contains the supporting resources that make up the cloud-news-summarizer. + +### Deploy +1. Set up and configure Pulumi - see [their docs](https://www.pulumi.com/docs/get-started/azure/) if you're not sure +2. Create a new stack within this project and set the required config properties - again see [their docs](https://www.pulumi.com/docs/intro/concepts/stack/) if you're unsure + - Use `pulumi config set ` to set the following config items: + - `azure-native:location` - region to deploy to, this must be one that has support for all the resources in the stack (Cognitive Services is likely to be the most exclusive - I used `westeurope`) + - `prefix` - a prefix to add to the stack's resource names in Azure, must be alphanumeric and between 1 and 15 characters +3. Deploy the stack (`pulumi up`) +4. Note the output values from the stack, they'll be used in the Function app deployment diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000..320e0b2 --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,45 @@ +"""An Azure RM Python Pulumi program""" + +import pulumi +import pulumi_azure_native as azure + +config = pulumi.Config() +prefix = config.require("prefix") + + +resource_group = azure.resources.ResourceGroup("rg", + resource_group_name="{}-cloud-news-summarizer".format(prefix)) + +account = azure.storage.StorageAccount("table-sa", + resource_group_name=resource_group.name, + sku=azure.storage.SkuArgs( + name=azure.storage.SkuName.STANDARD_ZRS, + ), + kind=azure.storage.Kind.STORAGE_V2, + account_name="{}cloudnews".format(prefix)) + +checkpoint_table = azure.storage.Table("checkpoint-table", + account_name=account.name, + resource_group_name=resource_group.name, + table_name="rsscheckpoint") + +process_queue = azure.storage.Queue("process-queue", + account_name=account.name, + resource_group_name=resource_group.name, + queue_name="processqueue") + +cognitive_account = azure.cognitiveservices.Account("cognitive-account", + account_name="{}-cloud-news-cognitive".format(prefix), + kind="TextAnalytics", + resource_group_name=resource_group.name, + sku=azure.cognitiveservices.SkuArgs(name="S")) + +pulumi.export("rg-name", resource_group.name) +pulumi.export("sa-name", account.name) +pulumi.export("table-name", checkpoint_table.name) +pulumi.export("queue-name", process_queue.name) +pulumi.export("cognitive-name", cognitive_account.name) +cognitive_endpoint = pulumi.Output.all(resource_group.name, cognitive_account.name)\ + .apply(lambda args: azure.cognitiveservices.get_account(resource_group_name=args[0], account_name=args[1]))\ + .apply(lambda properties: properties.properties.endpoint) +pulumi.export("cognitive-endpoint", cognitive_endpoint) diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000..d77b5a3 --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1,2 @@ +pulumi>=3.0.0,<4.0.0 +pulumi-azure-native>=1.0.0,<2.0.0