Skip to content

Commit

Permalink
Truncate large msgs in create_feedback and skip spam detection. +some…
Browse files Browse the repository at this point in the history
… refactoring
  • Loading branch information
aliu39 committed Oct 17, 2024
1 parent 59d9da9 commit 6429272
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 53 deletions.
105 changes: 52 additions & 53 deletions src/sentry/feedback/usecases/create_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@

import jsonschema

from sentry import features, options
from sentry import options
from sentry.constants import DataCategory
from sentry.eventstore.models import Event, GroupEvent
from sentry.feedback.usecases.spam_detection import is_spam
from sentry.feedback.usecases.spam_detection import (
auto_ignore_spam_feedbacks,
is_spam,
spam_detection_enabled,
)
from sentry.issues.grouptype import FeedbackGroup
from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence
from sentry.issues.json_schemas import EVENT_PAYLOAD_SCHEMA, LEGACY_EVENT_PAYLOAD_SCHEMA
from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
from sentry.issues.status_change_message import StatusChangeMessage
from sentry.models.group import GroupStatus
from sentry.models.project import Project
from sentry.signals import first_feedback_received, first_new_feedback_received
from sentry.types.group import GroupSubStatus
from sentry.utils import metrics
from sentry.utils.outcomes import Outcome, track_outcome
from sentry.utils.safe import get_path
Expand Down Expand Up @@ -141,7 +142,6 @@ def fix_for_issue_platform(event_data):
# If no user email was provided specify the contact-email as the user-email.
feedback_obj = event_data.get("contexts", {}).get("feedback", {})
contact_email = feedback_obj.get("contact_email")

if not ret_event["user"].get("email", ""):
ret_event["user"]["email"] = contact_email

Expand All @@ -168,6 +168,21 @@ def fix_for_issue_platform(event_data):
return ret_event


def validate_issue_platform_event_schema(event_data):
"""
The issue platform schema validation does not run in dev atm so we have to do the validation
ourselves, or else our tests are not representative of what happens in prod.
"""
try:
jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
try:
jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
metrics.incr("feedback.create_feedback_issue.invalid_schema")
raise


def should_filter_feedback(event, project_id, source: FeedbackCreationSource):
# Right now all unreal error events without a feedback
# actually get a sent a feedback with this message
Expand Down Expand Up @@ -222,26 +237,35 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
if should_filter_feedback(event, project_id, source):
return

feedback_message = event["contexts"]["feedback"]["message"]
max_msg_size = options.get("feedback.message.max-size") # Note options are cached.
project = Project.objects.get_from_cache(id=project_id)

# Spam detection.
is_message_spam = None
if features.has(
"organizations:user-feedback-spam-filter-ingest", project.organization
) and project.get_option("sentry:feedback_ai_spam_detection"):
try:
is_message_spam = is_spam(event["contexts"]["feedback"]["message"])
except Exception:
# until we have LLM error types ironed out, just catch all exceptions
logger.exception("Error checking if message is spam", extra={"project_id": project_id})
metrics.incr(
"feedback.create_feedback_issue.spam_detection",
tags={
"is_spam": is_message_spam,
"referrer": source.value,
"client_source": event["contexts"]["feedback"].get("source"),
},
sample_rate=1.0,
)
if spam_detection_enabled(project):
if len(feedback_message) <= max_msg_size:
try:
is_message_spam = is_spam(feedback_message)
except Exception:
# until we have LLM error types ironed out, just catch all exceptions
logger.exception(
"Error checking if message is spam", extra={"project_id": project_id}
)
metrics.incr(
"feedback.create_feedback_issue.spam_detection",
tags={
"is_spam": is_message_spam,
"referrer": source.value,
"client_source": event["contexts"]["feedback"].get("source"),
},
sample_rate=1.0,
)
else:
is_message_spam = True

if len(feedback_message) > max_msg_size:
feedback_message = feedback_message[:max_msg_size]

# Note that some of the fields below like title and subtitle
# are not used by the feedback UI, but are required.
Expand All @@ -257,7 +281,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
project_id=project_id,
fingerprint=issue_fingerprint, # random UUID for fingerprint so feedbacks are grouped individually
issue_title="User Feedback",
subtitle=event["contexts"]["feedback"]["message"],
subtitle=feedback_message,
resource_id=None,
evidence_data=evidence_data,
evidence_display=evidence_display,
Expand All @@ -279,6 +303,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
# make sure event data is valid for issue platform
validate_issue_platform_event_schema(event_fixed)

# Analytics
if not project.flags.has_feedbacks:
first_feedback_received.send_robust(project=project, sender=Project)

Expand All @@ -291,9 +316,11 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
):
first_new_feedback_received.send_robust(project=project, sender=Project)

# Send to issue platform for processing.
produce_occurrence_to_kafka(
payload_type=PayloadType.OCCURRENCE, occurrence=occurrence, event_data=event_fixed
)
# Mark as spam with a STATUS_CHANGE kafka message.
if is_message_spam:
auto_ignore_spam_feedbacks(project, issue_fingerprint)
metrics.incr(
Expand All @@ -304,6 +331,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
},
sample_rate=1.0,
)

track_outcome(
org_id=project.organization_id,
project_id=project_id,
Expand All @@ -317,21 +345,6 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
)


def validate_issue_platform_event_schema(event_data):
"""
The issue platform schema validation does not run in dev atm so we have to do the validation
ourselves, or else our tests are not representative of what happens in prod.
"""
try:
jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
try:
jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
metrics.incr("feedback.create_feedback_issue.invalid_schema")
raise


class UserReportShimDict(TypedDict):
name: str
email: str
Expand Down Expand Up @@ -390,19 +403,5 @@ def shim_to_feedback(
metrics.incr("feedback.shim_to_feedback.failed", tags={"referrer": source.value})


def auto_ignore_spam_feedbacks(project, issue_fingerprint):
if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
metrics.incr("feedback.spam-detection-actions.set-ignored")
produce_occurrence_to_kafka(
payload_type=PayloadType.STATUS_CHANGE,
status_change=StatusChangeMessage(
fingerprint=issue_fingerprint,
project_id=project.id,
new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab
new_substatus=GroupSubStatus.FOREVER,
),
)


def is_in_feedback_denylist(organization):
return organization.slug in options.get("feedback.organizations.slug-denylist")
26 changes: 26 additions & 0 deletions src/sentry/feedback/usecases/spam_detection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import logging

from sentry import features
from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
from sentry.issues.status_change_message import StatusChangeMessage
from sentry.llm.usecases import LLMUseCase, complete_prompt
from sentry.models.group import GroupStatus
from sentry.models.project import Project
from sentry.types.group import GroupSubStatus
from sentry.utils import metrics

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -68,3 +74,23 @@ def trim_response(text):
return True, trimmed_text
else:
return False, trimmed_text


def spam_detection_enabled(project: Project) -> bool:
return features.has(
"organizations:user-feedback-spam-filter-ingest", project.organization
) and project.get_option("sentry:feedback_ai_spam_detection")


def auto_ignore_spam_feedbacks(project, issue_fingerprint):
if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
metrics.incr("feedback.spam-detection-actions.set-ignored")
produce_occurrence_to_kafka(
payload_type=PayloadType.STATUS_CHANGE,
status_change=StatusChangeMessage(
fingerprint=issue_fingerprint,
project_id=project.id,
new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab
new_substatus=GroupSubStatus.FOREVER,
),
)
6 changes: 6 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@
default=[],
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"feedback.message.max-size",
type=Int,
default=4096,
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# Dev Toolbar Options
register(
Expand Down

0 comments on commit 6429272

Please sign in to comment.