Skip to content

Commit

Permalink
ref(pr-comments): move queuing task into CommitContextIntegration ABC (
Browse files Browse the repository at this point in the history
  • Loading branch information
cathteng authored Oct 17, 2024
1 parent 889c589 commit f286385
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 130 deletions.
4 changes: 4 additions & 0 deletions src/sentry/integrations/gitlab/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from requests import PreparedRequest

from sentry.identity.services.identity.model import RpcIdentity
from sentry.integrations.base import IntegrationFeatureNotImplementedError
from sentry.integrations.gitlab.blame import fetch_file_blames
from sentry.integrations.gitlab.utils import GitLabApiClientPath
from sentry.integrations.source_code_management.commit_context import (
Expand Down Expand Up @@ -309,6 +310,9 @@ def get_commit(self, project_id, sha):
"""
return self.get_cached(GitLabApiClientPath.commit.format(project=project_id, sha=sha))

def get_merge_commit_sha_from_commit(self, repo: str, sha: str) -> str | None:
raise IntegrationFeatureNotImplementedError

def compare_commits(self, project_id, start_sha, end_sha):
"""Compare commits between two SHAs
Expand Down
159 changes: 154 additions & 5 deletions src/sentry/integrations/source_code_management/commit_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,51 @@
from abc import ABC, abstractmethod
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from datetime import datetime
from datetime import datetime, timedelta, timezone
from typing import Any

from django.utils import timezone
import sentry_sdk
from django.utils import timezone as django_timezone

from sentry import analytics
from sentry.auth.exceptions import IdentityNotValid
from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig
from sentry.models.pullrequest import CommentType, PullRequestComment
from sentry.locks import locks
from sentry.models.commit import Commit
from sentry.models.group import Group
from sentry.models.groupowner import GroupOwner
from sentry.models.options.organization_option import OrganizationOption
from sentry.models.project import Project
from sentry.models.pullrequest import (
CommentType,
PullRequest,
PullRequestComment,
PullRequestCommit,
)
from sentry.models.repository import Repository
from sentry.users.models.identity import Identity
from sentry.utils import metrics
from sentry.utils.cache import cache

logger = logging.getLogger(__name__)


def _debounce_pr_comment_cache_key(pullrequest_id: int) -> str:
return f"pr-comment-{pullrequest_id}"


def _debounce_pr_comment_lock_key(pullrequest_id: int) -> str:
return f"queue_comment_task:{pullrequest_id}"


def _pr_comment_log(integration_name: str, suffix: str) -> str:
return f"{integration_name}.pr_comment.{suffix}"


PR_COMMENT_TASK_TTL = timedelta(minutes=5).total_seconds()
PR_COMMENT_WINDOW = 14 # days


@dataclass
class SourceLineInfo:
lineno: int
Expand Down Expand Up @@ -84,6 +113,122 @@ def get_commit_context_all_frames(
"""
return self.get_blame_for_files(files, extra)

def queue_comment_task_if_needed(
self,
project: Project,
commit: Commit,
group_owner: GroupOwner,
group_id: int,
) -> None:
if not OrganizationOption.objects.get_value(
organization=project.organization,
key="sentry:github_pr_bot",
default=True,
):
logger.info(
_pr_comment_log(integration_name=self.integration_name, suffix="disabled"),
extra={"organization_id": project.organization_id},
)
return

repo_query = Repository.objects.filter(id=commit.repository_id).order_by("-date_added")
group = Group.objects.get_from_cache(id=group_id)
if not (
group.level is not logging.INFO and repo_query.exists()
): # Don't comment on info level issues
logger.info(
_pr_comment_log(
integration_name=self.integration_name, suffix="incorrect_repo_config"
),
extra={"organization_id": project.organization_id},
)
return

repo: Repository = repo_query.get()

logger.info(
_pr_comment_log(integration_name=self.integration_name, suffix="queue_comment_check"),
extra={"organization_id": commit.organization_id, "merge_commit_sha": commit.key},
)
from sentry.integrations.github.tasks.pr_comment import github_comment_workflow

# client will raise an Exception if the request is not successful
try:
client = self.get_client()
merge_commit_sha = client.get_merge_commit_sha_from_commit(
repo=repo.name, sha=commit.key
)
except Exception as e:
sentry_sdk.capture_exception(e)
return

if merge_commit_sha is None:
logger.info(
_pr_comment_log(
integration_name=self.integration_name,
suffix="queue_comment_workflow.commit_not_in_default_branch",
),
extra={
"organization_id": commit.organization_id,
"repository_id": repo.id,
"commit_sha": commit.key,
},
)
return

pr_query = PullRequest.objects.filter(
organization_id=commit.organization_id,
repository_id=commit.repository_id,
merge_commit_sha=merge_commit_sha,
)
if not pr_query.exists():
logger.info(
_pr_comment_log(
integration_name=self.integration_name,
suffix="queue_comment_workflow.missing_pr",
),
extra={
"organization_id": commit.organization_id,
"repository_id": repo.id,
"commit_sha": commit.key,
},
)
return

pr = pr_query.first()
assert pr is not None
# need to query explicitly for merged PR comments since we can have multiple comments per PR
merged_pr_comment_query = PullRequestComment.objects.filter(
pull_request_id=pr.id, comment_type=CommentType.MERGED_PR
)
if pr.date_added >= datetime.now(tz=timezone.utc) - timedelta(days=PR_COMMENT_WINDOW) and (
not merged_pr_comment_query.exists()
or group_owner.group_id not in merged_pr_comment_query[0].group_ids
):
lock = locks.get(
_debounce_pr_comment_lock_key(pr.id), duration=10, name="queue_comment_task"
)
with lock.acquire():
cache_key = _debounce_pr_comment_cache_key(pullrequest_id=pr.id)
if cache.get(cache_key) is not None:
return

# create PR commit row for suspect commit and PR
PullRequestCommit.objects.get_or_create(commit=commit, pull_request=pr)

logger.info(
_pr_comment_log(
integration_name=self.integration_name, suffix="queue_comment_workflow"
),
extra={"pullrequest_id": pr.id, "project_id": group_owner.project_id},
)

cache.set(cache_key, True, PR_COMMENT_TASK_TTL)

github_comment_workflow.delay(
pullrequest_id=pr.id, project_id=group_owner.project_id
)

def create_or_update_comment(
self,
repo: Repository,
Expand Down Expand Up @@ -118,7 +263,7 @@ def create_or_update_comment(
),
)

current_time = timezone.now()
current_time = django_timezone.now()
comment = PullRequestComment.objects.create(
external_id=resp.body["id"],
pull_request_id=pullrequest_id,
Expand Down Expand Up @@ -156,7 +301,7 @@ def create_or_update_comment(
metrics.incr(
metrics_base.format(integration=self.integration_name, key="comment_updated")
)
pr_comment.updated_at = timezone.now()
pr_comment.updated_at = django_timezone.now()
pr_comment.group_ids = issue_list
pr_comment.save()

Expand Down Expand Up @@ -186,3 +331,7 @@ def update_comment(
self, repo: str, issue_id: str, comment_id: str, data: Mapping[str, Any]
) -> Any:
raise NotImplementedError

@abstractmethod
def get_merge_commit_sha_from_commit(self, repo: str, sha: str) -> str | None:
raise NotImplementedError
121 changes: 9 additions & 112 deletions src/sentry/tasks/commit_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@

import logging
from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta, timezone
from datetime import timedelta
from typing import Any

import sentry_sdk
from celery import Task
from celery.exceptions import MaxRetriesExceededError
from django.utils import timezone as django_timezone
from sentry_sdk import set_tag

from sentry import analytics
from sentry.api.serializers.models.release import get_users_for_authors
from sentry.integrations.base import IntegrationInstallation
from sentry.integrations.source_code_management.commit_context import CommitContextIntegration
from sentry.integrations.utils.code_mapping import get_sorted_code_mapping_configs
from sentry.integrations.utils.commit_context import (
find_commit_context_for_event_all_frames,
Expand All @@ -22,24 +21,14 @@
from sentry.locks import locks
from sentry.models.commit import Commit
from sentry.models.commitauthor import CommitAuthor
from sentry.models.group import Group
from sentry.models.groupowner import GroupOwner, GroupOwnerType
from sentry.models.options.organization_option import OrganizationOption
from sentry.models.project import Project
from sentry.models.projectownership import ProjectOwnership
from sentry.models.pullrequest import (
CommentType,
PullRequest,
PullRequestComment,
PullRequestCommit,
)
from sentry.models.repository import Repository
from sentry.shared_integrations.exceptions import ApiError
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task
from sentry.tasks.groupowner import process_suspect_commits
from sentry.utils import metrics
from sentry.utils.cache import cache
from sentry.utils.locking import UnableToAcquireLock
from sentry.utils.sdk import set_current_event_project

Expand All @@ -49,87 +38,11 @@
PR_COMMENT_WINDOW = 14 # days

# TODO: replace this with isinstance(installation, CommitContextIntegration)
PR_COMMENT_SUPPORTED_PROVIDERS = {"integrations:github"}
PR_COMMENT_SUPPORTED_PROVIDERS = {"github"}

logger = logging.getLogger(__name__)


def queue_comment_task_if_needed(
commit: Commit, group_owner: GroupOwner, repo: Repository, installation: IntegrationInstallation
) -> None:
from sentry.integrations.github.tasks.pr_comment import github_comment_workflow

logger.info(
"github.pr_comment.queue_comment_check",
extra={"organization_id": commit.organization_id, "merge_commit_sha": commit.key},
)

# client will raise an Exception if the request is not successful
try:
client = installation.get_client()
merge_commit_sha = client.get_merge_commit_sha_from_commit(repo=repo.name, sha=commit.key)
except Exception as e:
sentry_sdk.capture_exception(e)
return

if merge_commit_sha is None:
logger.info(
"github.pr_comment.queue_comment_check.commit_not_in_default_branch",
extra={
"organization_id": commit.organization_id,
"repository_id": repo.id,
"commit_sha": commit.key,
},
)
return

pr_query = PullRequest.objects.filter(
organization_id=commit.organization_id,
repository_id=commit.repository_id,
merge_commit_sha=merge_commit_sha,
)
if not pr_query.exists():
logger.info(
"github.pr_comment.queue_comment_check.missing_pr",
extra={
"organization_id": commit.organization_id,
"repository_id": repo.id,
"commit_sha": commit.key,
},
)
return

pr = pr_query.first()
assert pr is not None
# need to query explicitly for merged PR comments since we can have multiple comments per PR
merged_pr_comment_query = PullRequestComment.objects.filter(
pull_request_id=pr.id, comment_type=CommentType.MERGED_PR
)
if pr.date_added >= datetime.now(tz=timezone.utc) - timedelta(days=PR_COMMENT_WINDOW) and (
not merged_pr_comment_query.exists()
or group_owner.group_id not in merged_pr_comment_query[0].group_ids
):
lock = locks.get(
DEBOUNCE_PR_COMMENT_LOCK_KEY(pr.id), duration=10, name="queue_comment_task"
)
with lock.acquire():
cache_key = DEBOUNCE_PR_COMMENT_CACHE_KEY(pullrequest_id=pr.id)
if cache.get(cache_key) is not None:
return

# create PR commit row for suspect commit and PR
PullRequestCommit.objects.get_or_create(commit=commit, pull_request=pr)

logger.info(
"github.pr_comment.queue_comment_workflow",
extra={"pullrequest_id": pr.id, "project_id": group_owner.project_id},
)

cache.set(cache_key, True, PR_COMMENT_TASK_TTL)

github_comment_workflow.delay(pullrequest_id=pr.id, project_id=group_owner.project_id)


@instrumented_task(
name="sentry.tasks.process_commit_context",
queue="group_owners.process_commit_context",
Expand Down Expand Up @@ -270,29 +183,13 @@ def process_commit_context(
}, # Updates date of an existing owner, since we just matched them with this new event
)

if OrganizationOption.objects.get_value(
organization=project.organization,
key="sentry:github_pr_bot",
default=True,
if (
installation
and isinstance(installation, CommitContextIntegration)
and installation.integration_name
in PR_COMMENT_SUPPORTED_PROVIDERS # TODO: remove this check
):
logger.info(
"github.pr_comment",
extra={"organization_id": project.organization_id},
)
repo = Repository.objects.filter(id=commit.repository_id).order_by("-date_added")
group = Group.objects.get_from_cache(id=group_id)
if (
group.level is not logging.INFO # Don't comment on info level issues
and installation is not None
and repo.exists()
and repo.get().provider in PR_COMMENT_SUPPORTED_PROVIDERS
):
queue_comment_task_if_needed(commit, group_owner, repo.get(), installation)
else:
logger.info(
"github.pr_comment.incorrect_repo_config",
extra={"organization_id": project.organization_id},
)
installation.queue_comment_task_if_needed(project, commit, group_owner, group_id)

ProjectOwnership.handle_auto_assignment(
project_id=project.id,
Expand Down
Loading

0 comments on commit f286385

Please sign in to comment.