From 770d9786f0c0c5b374c7a333ad016674fd94b8cb Mon Sep 17 00:00:00 2001 From: Zach Waterfield Date: Wed, 29 Jan 2025 17:24:59 -0500 Subject: [PATCH] improve the logging for quote limiting --- ee/billing/quota_limiting.py | 64 ++++++++++++++++++-------- ee/billing/test/test_quota_limiting.py | 6 ++- posthog/tasks/tasks.py | 3 ++ 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/ee/billing/quota_limiting.py b/ee/billing/quota_limiting.py index 349ce368b5465..ac04f5e72785c 100644 --- a/ee/billing/quota_limiting.py +++ b/ee/billing/quota_limiting.py @@ -164,8 +164,9 @@ def org_quota_limited_until( update_organization_usage_field(organization, resource, "quota_limiting_suspended_until", None) report_organization_action( organization, - "quota limiting suspension removed", + "org_quota_limited_until", properties={ + "event": "suspension removed", "current_usage": usage + todays_usage, "resource": resource.value, "quota_limiting_suspended_until": quota_limiting_suspended_until, @@ -177,8 +178,9 @@ def org_quota_limited_until( if organization.never_drop_data or trust_score == 15: report_organization_action( organization, - "quota limiting ignored", + "org_quota_limited_until", properties={ + "event": "ignored", "current_usage": usage + todays_usage, "resource": resource.value, "never_drop_data": organization.never_drop_data, @@ -195,8 +197,9 @@ def org_quota_limited_until( # They are already being limited, do not update their status. report_organization_action( organization, - "quota limiting already limited", + "org_quota_limited_until", properties={ + "event": "already limited", "current_usage": usage + todays_usage, "resource": resource.value, }, @@ -217,8 +220,9 @@ def org_quota_limited_until( # Don't drop data for this org but record that they would have been limited. report_organization_action( organization, - "quota limiting suspended", + "org_quota_limited_until", properties={ + "event": "ignored", "current_usage": usage + todays_usage, "resource": resource.value, "feature_flag": QUOTA_LIMIT_DATA_RETENTION_FLAG, @@ -240,8 +244,9 @@ def org_quota_limited_until( organization.save(update_fields=["customer_trust_scores"]) report_organization_action( organization, - "quota limiting suspended", + "org_quota_limited_until", properties={ + "event": "suspended", "current_usage": usage + todays_usage, "resource": resource.value, "trust_score": trust_score, @@ -257,8 +262,9 @@ def org_quota_limited_until( # Low trust, immediately limit report_organization_action( organization, - "quota limiting suspended", + "org_quota_limited_until", properties={ + "event": "suspended", "current_usage": usage + todays_usage, "resource": resource.value, "trust_score": trust_score, @@ -282,8 +288,9 @@ def org_quota_limited_until( ): report_organization_action( organization, - "quota limiting suspended", + "org_quota_limited_until", properties={ + "event": "suspended", "current_usage": usage + todays_usage, "resource": resource.value, "grace_period_days": grace_period_days, @@ -302,8 +309,9 @@ def org_quota_limited_until( # If the suspension is still active (after today's end), we want to return the existing suspension date report_organization_action( organization, - "quota limiting suspension not expired", + "org_quota_limited_until", properties={ + "event": "suspension not expired", "current_usage": usage + todays_usage, "resource": resource.value, "quota_limiting_suspended_until": quota_limiting_suspended_until, @@ -317,8 +325,9 @@ def org_quota_limited_until( # If the suspension is expired, we want to limit the org report_organization_action( organization, - "quota limiting suspended expired", + "org_quota_limited_until", properties={ + "event": "suspended expired", "current_usage": usage + todays_usage, "resource": resource.value, }, @@ -331,8 +340,9 @@ def org_quota_limited_until( else: # Should never reach here - return the default behavior just to be safe report_quota_limiting_event( - "org_quota_limited_until unexpected trust score", + "org_quota_limited_until", { + "event": "unexpected trust score", "current_usage": usage + todays_usage, "resource": resource.value, "trust_score": trust_score, @@ -447,7 +457,12 @@ def update_all_orgs_billing_quotas( period = get_current_day() period_start, period_end = period report_quota_limiting_event( - "update_all_orgs_billing_quotas started", {"period_start": period_start, "period_end": period_end} + "update_all_orgs_billing_quotas", + { + "event": "started", + "period_start": period_start, + "period_end": period_end, + }, ) # Clickhouse is good at counting things so we count across all teams rather than doing it one by one @@ -476,8 +491,9 @@ def update_all_orgs_billing_quotas( ) ) report_quota_limiting_event( - "update_all_orgs_billing_quotas teams fetched", + "update_all_orgs_billing_quotas", { + "event": "teams fetched", "team_count": len(teams), }, ) @@ -507,8 +523,12 @@ def update_all_orgs_billing_quotas( # orgs_by_id is a dict of orgs by id (e.g. {"018e9acf-b488-0000-259c-534bcef40359": }) # todays_usage_report is a dict of orgs by id with their usage for the current day (e.g. {"018e9acf-b488-0000-259c-534bcef40359": {"events": 100, "recordings": 100, "rows_synced": 100}}) report_quota_limiting_event( - "update_all_orgs_billing_quotas reports built", - {"orgs_by_id_count": len(orgs_by_id), "todays_usage_report_count": len(todays_usage_report)}, + "update_all_orgs_billing_quotas", + { + "event": "team reports built", + "orgs_by_id_count": len(orgs_by_id), + "todays_usage_report_count": len(todays_usage_report), + }, ) quota_limited_orgs: dict[str, dict[str, int]] = {x.value: {} for x in QuotaResource} @@ -526,8 +546,9 @@ def update_all_orgs_billing_quotas( # We have the teams that are currently under quota limits # previously_quota_limited_team_tokens is a dict of resources to team tokens from redis (e.g. {"events": ["phc_123", "phc_456"], "recordings": ["phc_123", "phc_456"], "rows_synced": ["phc_123", "phc_456"]}) report_quota_limiting_event( - "update_all_orgs_billing_quotas previously quota limited teams fetched", + "update_all_orgs_billing_quotas", { + "event": "previously quota limited teams fetched", "events_count": len(previously_quota_limited_team_tokens["events"]), "recordings_count": len(previously_quota_limited_team_tokens["recordings"]), "rows_synced_count": len(previously_quota_limited_team_tokens["rows_synced"]), @@ -558,8 +579,9 @@ def update_all_orgs_billing_quotas( # quota_limited_orgs is a dict of resources to org ids (e.g. {"events": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "recordings": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "rows_synced": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}}) # quota_limiting_suspended_orgs is a dict of resources to org ids (e.g. {"events": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "recordings": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "rows_synced": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}}) report_quota_limiting_event( - "update_all_orgs_billing_quotas orgs fetched", + "update_all_orgs_billing_quotas", { + "event": "orgs summaries built", "quota_limited_orgs_count": len(quota_limited_orgs), "quota_limiting_suspended_orgs_count": len(quota_limiting_suspended_orgs), }, @@ -589,8 +611,9 @@ def update_all_orgs_billing_quotas( # quota_limited_teams is a dict of resources to team tokens (e.g. {"events": {"phc_123": 1737867600}, "recordings": {"phc_123": 1737867600}, "rows_synced": {"phc_123": 1737867600}}) # quota_limiting_suspended_teams is a dict of resources to team tokens (e.g. {"events": {"phc_123": 1737867600}, "recordings": {"phc_123": 1737867600}, "rows_synced": {"phc_123": 1737867600}}) report_quota_limiting_event( - "update_all_orgs_billing_quotas teams fetched", + "update_all_orgs_billing_quotas", { + "event": "teams summaries built", "quota_limited_teams_count": len(quota_limited_teams), "quota_limiting_suspended_teams_count": len(quota_limiting_suspended_teams), "orgs_with_changes_count": len(orgs_with_changes), @@ -622,7 +645,12 @@ def update_all_orgs_billing_quotas( quota_limiting_suspended_teams[field], QuotaLimitingCaches.QUOTA_LIMITING_SUSPENDED_KEY, ) - report_quota_limiting_event("update_all_orgs_billing_quotas finished", {}) + report_quota_limiting_event( + "update_all_orgs_billing_quotas", + { + "event": "finished", + }, + ) return quota_limited_orgs, quota_limiting_suspended_orgs diff --git a/ee/billing/test/test_quota_limiting.py b/ee/billing/test/test_quota_limiting.py index 0998f54d90bb4..800644db697f5 100644 --- a/ee/billing/test/test_quota_limiting.py +++ b/ee/billing/test/test_quota_limiting.py @@ -78,9 +78,10 @@ def test_quota_limiting_feature_flag_enabled(self, patch_feature_enabled, patch_ assert patch_capture.call_count == 8 # 7 logs + 1 org # Find the org action call org_action_call = next( - call for call in patch_capture.call_args_list if call.args[1] == "quota limiting suspended" + call for call in patch_capture.call_args_list if call.args[1] == "org_quota_limited_until" ) assert org_action_call.kwargs.get("properties") == { + "event": "ignored", "current_usage": 109, "resource": "events", "feature_flag": QUOTA_LIMIT_DATA_RETENTION_FLAG, @@ -119,9 +120,10 @@ def test_quota_limiting_feature_flag_enabled(self, patch_feature_enabled, patch_ assert patch_capture.call_count == 8 # 7 logs + 1 org # Find the org action call org_action_call = next( - call for call in patch_capture.call_args_list if call.args[1] == "quota limiting already limited" + call for call in patch_capture.call_args_list if call.args[1] == "org_quota_limited_until" ) assert org_action_call.kwargs.get("properties") == { + "event": "already limited", "current_usage": 109, "resource": "events", } diff --git a/posthog/tasks/tasks.py b/posthog/tasks/tasks.py index 917f4878cd6d7..162367bfba22e 100644 --- a/posthog/tasks/tasks.py +++ b/posthog/tasks/tasks.py @@ -1,6 +1,7 @@ import time from typing import Optional from uuid import UUID +from sentry_sdk import capture_exception import requests from celery import shared_task @@ -855,6 +856,8 @@ def update_quota_limiting() -> None: update_all_orgs_billing_quotas() except ImportError: pass + except Exception as e: + capture_exception(e) @shared_task(ignore_result=True)