Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: improve the logging for quote limiting #28063

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 46 additions & 18 deletions ee/billing/quota_limiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,9 @@ def org_quota_limited_until(
update_organization_usage_field(organization, resource, "quota_limiting_suspended_until", None)
report_organization_action(
organization,
"quota limiting suspension removed",
"org_quota_limited_until",
properties={
"event": "suspension removed",
"current_usage": usage + todays_usage,
"resource": resource.value,
"quota_limiting_suspended_until": quota_limiting_suspended_until,
Expand All @@ -177,8 +178,9 @@ def org_quota_limited_until(
if organization.never_drop_data or trust_score == 15:
report_organization_action(
organization,
"quota limiting ignored",
"org_quota_limited_until",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we might want to rename to something generic since in some of these cases we are not rate limiting?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't want to touch report_organization_action because those have been around for a while but yeah once I fix the underlying issue I'm going to revisit the core events here.

properties={
"event": "ignored",
"current_usage": usage + todays_usage,
"resource": resource.value,
"never_drop_data": organization.never_drop_data,
Expand All @@ -195,8 +197,9 @@ def org_quota_limited_until(
# They are already being limited, do not update their status.
report_organization_action(
organization,
"quota limiting already limited",
"org_quota_limited_until",
properties={
"event": "already limited",
"current_usage": usage + todays_usage,
"resource": resource.value,
},
Expand All @@ -217,8 +220,9 @@ def org_quota_limited_until(
# Don't drop data for this org but record that they would have been limited.
report_organization_action(
organization,
"quota limiting suspended",
"org_quota_limited_until",
properties={
"event": "ignored",
"current_usage": usage + todays_usage,
"resource": resource.value,
"feature_flag": QUOTA_LIMIT_DATA_RETENTION_FLAG,
Expand All @@ -240,8 +244,9 @@ def org_quota_limited_until(
organization.save(update_fields=["customer_trust_scores"])
report_organization_action(
organization,
"quota limiting suspended",
"org_quota_limited_until",
properties={
"event": "suspended",
"current_usage": usage + todays_usage,
"resource": resource.value,
"trust_score": trust_score,
Expand All @@ -257,8 +262,9 @@ def org_quota_limited_until(
# Low trust, immediately limit
report_organization_action(
organization,
"quota limiting suspended",
"org_quota_limited_until",
properties={
"event": "suspended",
"current_usage": usage + todays_usage,
"resource": resource.value,
"trust_score": trust_score,
Expand All @@ -282,8 +288,9 @@ def org_quota_limited_until(
):
report_organization_action(
organization,
"quota limiting suspended",
"org_quota_limited_until",
properties={
"event": "suspended",
"current_usage": usage + todays_usage,
"resource": resource.value,
"grace_period_days": grace_period_days,
Expand All @@ -302,8 +309,9 @@ def org_quota_limited_until(
# If the suspension is still active (after today's end), we want to return the existing suspension date
report_organization_action(
organization,
"quota limiting suspension not expired",
"org_quota_limited_until",
properties={
"event": "suspension not expired",
"current_usage": usage + todays_usage,
"resource": resource.value,
"quota_limiting_suspended_until": quota_limiting_suspended_until,
Expand All @@ -317,8 +325,9 @@ def org_quota_limited_until(
# If the suspension is expired, we want to limit the org
report_organization_action(
organization,
"quota limiting suspended expired",
"org_quota_limited_until",
properties={
"event": "suspended expired",
"current_usage": usage + todays_usage,
"resource": resource.value,
},
Expand All @@ -331,8 +340,9 @@ def org_quota_limited_until(
else:
# Should never reach here - return the default behavior just to be safe
report_quota_limiting_event(
"org_quota_limited_until unexpected trust score",
"org_quota_limited_until",
{
"event": "unexpected trust score",
"current_usage": usage + todays_usage,
"resource": resource.value,
"trust_score": trust_score,
Expand Down Expand Up @@ -447,7 +457,12 @@ def update_all_orgs_billing_quotas(
period = get_current_day()
period_start, period_end = period
report_quota_limiting_event(
"update_all_orgs_billing_quotas started", {"period_start": period_start, "period_end": period_end}
"update_all_orgs_billing_quotas",
{
"event": "started",
"period_start": period_start,
"period_end": period_end,
},
)

# Clickhouse is good at counting things so we count across all teams rather than doing it one by one
Expand Down Expand Up @@ -476,8 +491,9 @@ def update_all_orgs_billing_quotas(
)
)
report_quota_limiting_event(
"update_all_orgs_billing_quotas teams fetched",
"update_all_orgs_billing_quotas",
{
"event": "teams fetched",
"team_count": len(teams),
},
)
Expand Down Expand Up @@ -507,8 +523,12 @@ def update_all_orgs_billing_quotas(
# orgs_by_id is a dict of orgs by id (e.g. {"018e9acf-b488-0000-259c-534bcef40359": <Organization: 018e9acf-b488-0000-259c-534bcef40359>})
# todays_usage_report is a dict of orgs by id with their usage for the current day (e.g. {"018e9acf-b488-0000-259c-534bcef40359": {"events": 100, "recordings": 100, "rows_synced": 100}})
report_quota_limiting_event(
"update_all_orgs_billing_quotas reports built",
{"orgs_by_id_count": len(orgs_by_id), "todays_usage_report_count": len(todays_usage_report)},
"update_all_orgs_billing_quotas",
{
"event": "team reports built",
"orgs_by_id_count": len(orgs_by_id),
"todays_usage_report_count": len(todays_usage_report),
},
)

quota_limited_orgs: dict[str, dict[str, int]] = {x.value: {} for x in QuotaResource}
Expand All @@ -526,8 +546,9 @@ def update_all_orgs_billing_quotas(
# We have the teams that are currently under quota limits
# previously_quota_limited_team_tokens is a dict of resources to team tokens from redis (e.g. {"events": ["phc_123", "phc_456"], "recordings": ["phc_123", "phc_456"], "rows_synced": ["phc_123", "phc_456"]})
report_quota_limiting_event(
"update_all_orgs_billing_quotas previously quota limited teams fetched",
"update_all_orgs_billing_quotas",
{
"event": "previously quota limited teams fetched",
"events_count": len(previously_quota_limited_team_tokens["events"]),
"recordings_count": len(previously_quota_limited_team_tokens["recordings"]),
"rows_synced_count": len(previously_quota_limited_team_tokens["rows_synced"]),
Expand Down Expand Up @@ -558,8 +579,9 @@ def update_all_orgs_billing_quotas(
# quota_limited_orgs is a dict of resources to org ids (e.g. {"events": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "recordings": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "rows_synced": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}})
# quota_limiting_suspended_orgs is a dict of resources to org ids (e.g. {"events": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "recordings": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}, "rows_synced": {"018e9acf-b488-0000-259c-534bcef40359": 1737867600}})
report_quota_limiting_event(
"update_all_orgs_billing_quotas orgs fetched",
"update_all_orgs_billing_quotas",
{
"event": "orgs summaries built",
"quota_limited_orgs_count": len(quota_limited_orgs),
"quota_limiting_suspended_orgs_count": len(quota_limiting_suspended_orgs),
},
Expand Down Expand Up @@ -589,8 +611,9 @@ def update_all_orgs_billing_quotas(
# quota_limited_teams is a dict of resources to team tokens (e.g. {"events": {"phc_123": 1737867600}, "recordings": {"phc_123": 1737867600}, "rows_synced": {"phc_123": 1737867600}})
# quota_limiting_suspended_teams is a dict of resources to team tokens (e.g. {"events": {"phc_123": 1737867600}, "recordings": {"phc_123": 1737867600}, "rows_synced": {"phc_123": 1737867600}})
report_quota_limiting_event(
"update_all_orgs_billing_quotas teams fetched",
"update_all_orgs_billing_quotas",
{
"event": "teams summaries built",
"quota_limited_teams_count": len(quota_limited_teams),
"quota_limiting_suspended_teams_count": len(quota_limiting_suspended_teams),
"orgs_with_changes_count": len(orgs_with_changes),
Expand Down Expand Up @@ -622,7 +645,12 @@ def update_all_orgs_billing_quotas(
quota_limiting_suspended_teams[field],
QuotaLimitingCaches.QUOTA_LIMITING_SUSPENDED_KEY,
)
report_quota_limiting_event("update_all_orgs_billing_quotas finished", {})
report_quota_limiting_event(
"update_all_orgs_billing_quotas",
{
"event": "finished",
},
)

return quota_limited_orgs, quota_limiting_suspended_orgs

Expand Down
6 changes: 4 additions & 2 deletions ee/billing/test/test_quota_limiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ def test_quota_limiting_feature_flag_enabled(self, patch_feature_enabled, patch_
assert patch_capture.call_count == 8 # 7 logs + 1 org
# Find the org action call
org_action_call = next(
call for call in patch_capture.call_args_list if call.args[1] == "quota limiting suspended"
call for call in patch_capture.call_args_list if call.args[1] == "org_quota_limited_until"
)
assert org_action_call.kwargs.get("properties") == {
"event": "ignored",
"current_usage": 109,
"resource": "events",
"feature_flag": QUOTA_LIMIT_DATA_RETENTION_FLAG,
Expand Down Expand Up @@ -119,9 +120,10 @@ def test_quota_limiting_feature_flag_enabled(self, patch_feature_enabled, patch_
assert patch_capture.call_count == 8 # 7 logs + 1 org
# Find the org action call
org_action_call = next(
call for call in patch_capture.call_args_list if call.args[1] == "quota limiting already limited"
call for call in patch_capture.call_args_list if call.args[1] == "org_quota_limited_until"
)
assert org_action_call.kwargs.get("properties") == {
"event": "already limited",
"current_usage": 109,
"resource": "events",
}
Expand Down
3 changes: 3 additions & 0 deletions posthog/tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
from typing import Optional
from uuid import UUID
from sentry_sdk import capture_exception

import requests
from celery import shared_task
Expand Down Expand Up @@ -855,6 +856,8 @@ def update_quota_limiting() -> None:
update_all_orgs_billing_quotas()
except ImportError:
pass
except Exception as e:
capture_exception(e)


@shared_task(ignore_result=True)
Expand Down
Loading