Truncate large msgs in create_feedback and skip spam detection. +some…

… refactoring
getsentry · Oct 17, 2024 · 6429272 · 6429272
1 parent 59d9da9
commit 6429272
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 53 deletions.
diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py
@@ -8,19 +8,20 @@
 
 import jsonschema
 
-from sentry import features, options
+from sentry import options
 from sentry.constants import DataCategory
 from sentry.eventstore.models import Event, GroupEvent
-from sentry.feedback.usecases.spam_detection import is_spam
+from sentry.feedback.usecases.spam_detection import (
+    auto_ignore_spam_feedbacks,
+    is_spam,
+    spam_detection_enabled,
+)
 from sentry.issues.grouptype import FeedbackGroup
 from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence
 from sentry.issues.json_schemas import EVENT_PAYLOAD_SCHEMA, LEGACY_EVENT_PAYLOAD_SCHEMA
 from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
-from sentry.issues.status_change_message import StatusChangeMessage
-from sentry.models.group import GroupStatus
 from sentry.models.project import Project
 from sentry.signals import first_feedback_received, first_new_feedback_received
-from sentry.types.group import GroupSubStatus
 from sentry.utils import metrics
 from sentry.utils.outcomes import Outcome, track_outcome
 from sentry.utils.safe import get_path
@@ -141,7 +142,6 @@ def fix_for_issue_platform(event_data):
     # If no user email was provided specify the contact-email as the user-email.
     feedback_obj = event_data.get("contexts", {}).get("feedback", {})
     contact_email = feedback_obj.get("contact_email")
-
     if not ret_event["user"].get("email", ""):
         ret_event["user"]["email"] = contact_email
 
@@ -168,6 +168,21 @@ def fix_for_issue_platform(event_data):
     return ret_event
 
 
+def validate_issue_platform_event_schema(event_data):
+    """
+    The issue platform schema validation does not run in dev atm so we have to do the validation
+    ourselves, or else our tests are not representative of what happens in prod.
+    """
+    try:
+        jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
+    except jsonschema.exceptions.ValidationError:
+        try:
+            jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
+        except jsonschema.exceptions.ValidationError:
+            metrics.incr("feedback.create_feedback_issue.invalid_schema")
+            raise
+
+
 def should_filter_feedback(event, project_id, source: FeedbackCreationSource):
     # Right now all unreal error events without a feedback
     # actually get a sent a feedback with this message
@@ -222,26 +237,35 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
     if should_filter_feedback(event, project_id, source):
         return
 
+    feedback_message = event["contexts"]["feedback"]["message"]
+    max_msg_size = options.get("feedback.message.max-size")  # Note options are cached.
     project = Project.objects.get_from_cache(id=project_id)
 
+    # Spam detection.
     is_message_spam = None
-    if features.has(
-        "organizations:user-feedback-spam-filter-ingest", project.organization
-    ) and project.get_option("sentry:feedback_ai_spam_detection"):
-        try:
-            is_message_spam = is_spam(event["contexts"]["feedback"]["message"])
-        except Exception:
-            # until we have LLM error types ironed out, just catch all exceptions
-            logger.exception("Error checking if message is spam", extra={"project_id": project_id})
-        metrics.incr(
-            "feedback.create_feedback_issue.spam_detection",
-            tags={
-                "is_spam": is_message_spam,
-                "referrer": source.value,
-                "client_source": event["contexts"]["feedback"].get("source"),
-            },
-            sample_rate=1.0,
-        )
+    if spam_detection_enabled(project):
+        if len(feedback_message) <= max_msg_size:
+            try:
+                is_message_spam = is_spam(feedback_message)
+            except Exception:
+                # until we have LLM error types ironed out, just catch all exceptions
+                logger.exception(
+                    "Error checking if message is spam", extra={"project_id": project_id}
+                )
+            metrics.incr(
+                "feedback.create_feedback_issue.spam_detection",
+                tags={
+                    "is_spam": is_message_spam,
+                    "referrer": source.value,
+                    "client_source": event["contexts"]["feedback"].get("source"),
+                },
+                sample_rate=1.0,
+            )
+        else:
+            is_message_spam = True
+
+    if len(feedback_message) > max_msg_size:
+        feedback_message = feedback_message[:max_msg_size]
 
     # Note that some of the fields below like title and subtitle
     # are not used by the feedback UI, but are required.
@@ -257,7 +281,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
         project_id=project_id,
         fingerprint=issue_fingerprint,  # random UUID for fingerprint so feedbacks are grouped individually
         issue_title="User Feedback",
-        subtitle=event["contexts"]["feedback"]["message"],
+        subtitle=feedback_message,
         resource_id=None,
         evidence_data=evidence_data,
         evidence_display=evidence_display,
@@ -279,6 +303,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
     # make sure event data is valid for issue platform
     validate_issue_platform_event_schema(event_fixed)
 
+    # Analytics
     if not project.flags.has_feedbacks:
         first_feedback_received.send_robust(project=project, sender=Project)
 
@@ -291,9 +316,11 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
     ):
         first_new_feedback_received.send_robust(project=project, sender=Project)
 
+    # Send to issue platform for processing.
     produce_occurrence_to_kafka(
         payload_type=PayloadType.OCCURRENCE, occurrence=occurrence, event_data=event_fixed
     )
+    # Mark as spam with a STATUS_CHANGE kafka message.
     if is_message_spam:
         auto_ignore_spam_feedbacks(project, issue_fingerprint)
     metrics.incr(
@@ -304,6 +331,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
         },
         sample_rate=1.0,
     )
+
     track_outcome(
         org_id=project.organization_id,
         project_id=project_id,
@@ -317,21 +345,6 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
     )
 
 
-def validate_issue_platform_event_schema(event_data):
-    """
-    The issue platform schema validation does not run in dev atm so we have to do the validation
-    ourselves, or else our tests are not representative of what happens in prod.
-    """
-    try:
-        jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
-    except jsonschema.exceptions.ValidationError:
-        try:
-            jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
-        except jsonschema.exceptions.ValidationError:
-            metrics.incr("feedback.create_feedback_issue.invalid_schema")
-            raise
-
-
 class UserReportShimDict(TypedDict):
     name: str
     email: str
@@ -390,19 +403,5 @@ def shim_to_feedback(
         metrics.incr("feedback.shim_to_feedback.failed", tags={"referrer": source.value})
 
 
-def auto_ignore_spam_feedbacks(project, issue_fingerprint):
-    if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
-        metrics.incr("feedback.spam-detection-actions.set-ignored")
-        produce_occurrence_to_kafka(
-            payload_type=PayloadType.STATUS_CHANGE,
-            status_change=StatusChangeMessage(
-                fingerprint=issue_fingerprint,
-                project_id=project.id,
-                new_status=GroupStatus.IGNORED,  # we use ignored in the UI for the spam tab
-                new_substatus=GroupSubStatus.FOREVER,
-            ),
-        )
-
-
 def is_in_feedback_denylist(organization):
     return organization.slug in options.get("feedback.organizations.slug-denylist")
diff --git a/src/sentry/feedback/usecases/spam_detection.py b/src/sentry/feedback/usecases/spam_detection.py
@@ -1,6 +1,12 @@
 import logging
 
+from sentry import features
+from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
+from sentry.issues.status_change_message import StatusChangeMessage
 from sentry.llm.usecases import LLMUseCase, complete_prompt
+from sentry.models.group import GroupStatus
+from sentry.models.project import Project
+from sentry.types.group import GroupSubStatus
 from sentry.utils import metrics
 
 logger = logging.getLogger(__name__)
@@ -68,3 +74,23 @@ def trim_response(text):
         return True, trimmed_text
     else:
         return False, trimmed_text
+
+
+def spam_detection_enabled(project: Project) -> bool:
+    return features.has(
+        "organizations:user-feedback-spam-filter-ingest", project.organization
+    ) and project.get_option("sentry:feedback_ai_spam_detection")
+
+
+def auto_ignore_spam_feedbacks(project, issue_fingerprint):
+    if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
+        metrics.incr("feedback.spam-detection-actions.set-ignored")
+        produce_occurrence_to_kafka(
+            payload_type=PayloadType.STATUS_CHANGE,
+            status_change=StatusChangeMessage(
+                fingerprint=issue_fingerprint,
+                project_id=project.id,
+                new_status=GroupStatus.IGNORED,  # we use ignored in the UI for the spam tab
+                new_substatus=GroupSubStatus.FOREVER,
+            ),
+        )
diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py
@@ -474,6 +474,12 @@
     default=[],
     flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
 )
+register(
+    "feedback.message.max-size",
+    type=Int,
+    default=4096,
+    flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
+)
 
 # Dev Toolbar Options
 register(