From d335c56f354b71864e925eb5c6c03fc98174a24c Mon Sep 17 00:00:00 2001 From: Travis Semple Date: Tue, 10 Dec 2024 11:32:26 -0800 Subject: [PATCH] Fix notebook so it's more consistent, also make it so the queries work on any day of the month. Rename some of the columns so they are more concise. --- jobs/notebook-report/config.py | 6 +- .../monthly/reconciliation_summary.ipynb | 66 ++++++++++++++----- jobs/notebook-report/notebookreport.py | 9 +-- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/jobs/notebook-report/config.py b/jobs/notebook-report/config.py index b2c20019e..497361f3a 100644 --- a/jobs/notebook-report/config.py +++ b/jobs/notebook-report/config.py @@ -7,6 +7,7 @@ class Config(object): + """Class configuring our environment variables in one defined place.""" PROJECT_ROOT = os.getcwd() APP_FILE = os.getenv("APP_FILE", "") SENDER_EMAIL = os.getenv("SENDER_EMAIL", "") @@ -23,8 +24,11 @@ class Config(object): WEEKLY_REPORT_DATES = os.getenv("WEEKLY_REPORT_DATES", "[1]") MONTHLY_REPORT_DATES = os.getenv("MONTHLY_REPORT_DATES", "[1]") PARTNER_CODES = os.getenv("PARTNER_CODES", "CSO,VS,RPT,ESRA") + REPORT_API_URL = os.getenv("REPORT_API_URL", "") + os.getenv("REPORT_API_VERSION", "/api/v1") + NOTEBOOK_SERVICE_ACCOUNT_ID = os.getenv("NOTEBOOK_SERVICE_ACCOUNT_ID", "") + NOTEBOOK_SERVICE_ACCOUNT_SECRET = os.getenv("NOTEBOOK_SERVICE_ACCOUNT_SECRET", "") + JWT_OIDC_ISSUER = os.getenv("JWT_OIDC_ISSUER", "") - # POSTGRESQL PAY_USER = os.getenv("PAY_USER", "") PAY_PASSWORD = os.getenv("PAY_PASSWORD", "") PAY_DB_NAME = os.getenv("PAY_DB_NAME", "") diff --git a/jobs/notebook-report/monthly/reconciliation_summary.ipynb b/jobs/notebook-report/monthly/reconciliation_summary.ipynb index cd489ebab..9f6798a6a 100644 --- a/jobs/notebook-report/monthly/reconciliation_summary.ipynb +++ b/jobs/notebook-report/monthly/reconciliation_summary.ipynb @@ -51,8 +51,9 @@ "source": [ "import base64\n", "from config import Config\n", - "from datetime import datetime, timezone, timedelta\n", + "\n", "import os\n", + "\n", "import requests\n", "from IPython import get_ipython\n", "from IPython.display import display, Markdown\n", @@ -145,6 +146,35 @@ "set time zone 'UTC';" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timezone, timedelta\n", + "from dateutil.relativedelta import relativedelta\n", + "import pytz\n", + "\n", + "def get_utc_timezone_adjusted_date(target_date) -> str:\n", + " target_datetime = datetime.combine(target_date, datetime.min.time())\n", + " hours = target_datetime.astimezone(pytz.timezone(\"America/Vancouver\")).utcoffset().total_seconds() / 60 / 60\n", + " target_date = target_datetime.replace(tzinfo=timezone.utc) + relativedelta(hours=-hours)\n", + " return target_date.replace(tzinfo=None).strftime('%Y-%m-%d %H:%M:%S')\n", + "\n", + "def get_first_last_month_dates_in_utc() -> tuple[str, str]:\n", + " current_time = datetime.now(pytz.timezone(\"America/Vancouver\"))\n", + " last_month = current_time - relativedelta(months=1)\n", + " from_date = last_month.replace(day=1)\n", + " from_date = get_utc_timezone_adjusted_date(from_date)\n", + " to_date = (last_month.replace(day=1) + relativedelta(months=1)) - timedelta(days=1)\n", + " to_date = get_utc_timezone_adjusted_date(to_date)\n", + " return from_date, to_date\n", + "\n", + "from_date, to_date = get_first_last_month_dates_in_utc()\n", + "print(f'Using from_date: {from_date} and to_date: {to_date}')" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -167,7 +197,7 @@ "%%sql monthly_reconciliation_summary <<\n", "SELECT\n", " id,\n", - " (created_on AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver')::date AS created_date,\n", + " (created_on AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver')::date AS created_date_pacific,\n", " total,\n", " service_fees,\n", " payment_method_code,\n", @@ -181,8 +211,8 @@ " AND total > 0\n", " AND invoice_status_code = 'PAID'\n", " AND payment_method_code in ('PAD','EJV')\n", - " AND created_on AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver' > (current_date - 1 - interval '1 months')::date\n", - " AND created_on AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver' <= (current_date - 1)::date\n", + " AND created_on >= :from_date\n", + " AND created_on <= :to_date\n", "ORDER BY\n", " 1;" ] @@ -241,17 +271,22 @@ "print(f\"Processing for partner_code: {partner_code}\")\n", "print(f\"Partners for disbursement summary: {partners_for_disbursement_summary}\")\n", "\n", + "# Only date part, no time saved for this field, also I believe this is pacific, it comes straight\n", + "# from the feedback files.\n", + "from_date = from_date.split(' ')[0] \n", + "to_date = to_date.split(' ')[0]\n", + "print(f'Using from_date: {from_date} and to_date: {to_date}')\n", "if partner_code in partners_for_disbursement_summary:\n", " print(f\"Partner code {partner_code} found in the list, executing SQL query.\")\n", " query = f\"\"\"\n", - " SELECT id, (disbursement_date AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver')::date, total, service_fees, payment_method_code, corp_type_code,created_by\n", + " SELECT id as transaction_id, disbursement_date::date disbursement_date_pacific, total, service_fees, payment_method_code, corp_type_code,created_by\n", " FROM invoices\n", " WHERE corp_type_code = :partner_code\n", " AND invoice_status_code = 'PAID'\n", " AND payment_method_code in ('PAD','EJV')\n", " AND disbursement_status_code = 'COMPLETED'\n", - " AND disbursement_date AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver' > (current_date - 1 - interval '1 months'- interval '5 days')::date\n", - " AND disbursement_date AT TIME ZONE 'UTC' AT TIME ZONE 'America/Vancouver' <= (current_date - 1)::date\n", + " AND disbursement_date >= {from_date}\n", + " AND disbursement_date <= {to_date}\n", " order by 1;\n", " \"\"\"\n", "\n", @@ -309,13 +344,13 @@ "outputs": [], "source": [ "payload = \"grant_type=client_credentials\"\n", - "basic_hash = base64.b64encode(f\"{os.getenv('NOTEBOOK_SERVICE_ACCOUNT_ID')}:{os.getenv('NOTEBOOK_SERVICE_ACCOUNT_SECRET')}\".encode())\n", + "basic_hash = base64.b64encode(f\"{Config.NOTEBOOK_SERVICE_ACCOUNT_ID}:{Config.NOTEBOOK_SERVICE_ACCOUNT_SECRET}\".encode())\n", "\n", "headers = {\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'Authorization': f'Basic {basic_hash.decode()}'\n", "}\n", - "response = requests.request(\"POST\", f\"{os.getenv('JWT_OIDC_ISSUER')}/protocol/openid-connect/token\", headers=headers, data=payload)\n", + "response = requests.request(\"POST\", f\"{Config.JWT_OIDC_ISSUER}/protocol/openid-connect/token\", headers=headers, data=payload)\n", "\n", "assert response.status_code == 200\n", "notebook_service_account_token = response.json().get('access_token')" @@ -365,8 +400,8 @@ "}\n", "\n", "def generate_report(partner_code):\n", - " API_BASE_URL = os.getenv('REPORT_API_URL', '')\n", - " if not API_BASE_URL:\n", + " API_BASE_URL = Config.REPORT_API_URL + '/reports'\n", + " if not Config.REPORT_API_URL:\n", " raise ValueError(\"The REPORT_API_URL environment variable is not set or is empty\")\n", "\n", " url = API_BASE_URL\n", @@ -461,7 +496,7 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "notebook-report-cQwRR_n3-py3.12", "language": "python", "name": "python3" }, @@ -475,7 +510,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.2" }, "pycharm": { "stem_cell": { @@ -485,11 +520,6 @@ }, "source": [] } - }, - "vscode": { - "interpreter": { - "hash": "fcb35bce15c55b4cacb5112e543368f86c7f98ed17acd45e6841ee83ed1df6e3" - } } }, "nbformat": 4, diff --git a/jobs/notebook-report/notebookreport.py b/jobs/notebook-report/notebookreport.py index 2416dc625..acc21a10f 100644 --- a/jobs/notebook-report/notebookreport.py +++ b/jobs/notebook-report/notebookreport.py @@ -7,7 +7,7 @@ import smtplib import sys import traceback -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from email import encoders from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart @@ -31,7 +31,6 @@ def create_app(config=Config): """Create app.""" app = Flask(__name__) app.config.from_object(config) - # db.init_app(app) app.app_context().push() current_app.logger.debug("created the Flask App and pushed the App Context") @@ -185,14 +184,12 @@ def get_partner_recipients(file_processing: str, partner_code: str) -> str: if __name__ == "__main__": - start_time = datetime.utcnow() + start_time = datetime.now(tz=timezone.utc) temp_dir = os.path.join(os.getcwd(), r"data/") - # Check if the subfolders for notebooks exist, and create them if they don't if not os.path.exists(temp_dir): os.makedirs(temp_dir) - # Current partner codes to execute notebooks on partner_codes = Config.PARTNER_CODES.split(",") # Process notebooks for each partner @@ -205,6 +202,6 @@ def get_partner_recipients(file_processing: str, partner_code: str) -> str: # process weekly pay notebook separate from partner notebooks process_notebooks("weekly", temp_dir) - end_time = datetime.utcnow() + end_time = datetime.now(tz=timezone.utc) logging.info("job - jupyter notebook report completed in: %s", end_time - start_time) sys.exit()