-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
landing_worker: add and use metadata from Revision rather than HgPatchHelper (bug 1936171) #200
Changes from 4 commits
95643ea
8ff9b8f
e9e807f
11f298f
eab0666
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,16 @@ | |
from __future__ import annotations | ||
|
||
import logging | ||
from typing import Any | ||
import re | ||
from io import StringIO | ||
from typing import Any, Optional | ||
|
||
from django.db import models | ||
from django.utils.translation import gettext_lazy | ||
|
||
from lando.api.legacy.hgexports import HgPatchHelper | ||
from lando.main.models.base import BaseModel | ||
from lando.main.scm.exceptions import NoDiffStartLine | ||
from lando.utils import build_patch_for_revision | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -33,20 +37,22 @@ class Revision(BaseModel): | |
Includes a reference to the related Phabricator revision and diff ID if one exists. | ||
""" | ||
|
||
def __str__(self): | ||
return f"Revision {self.revision_id} Diff {self.diff_id}" | ||
|
||
# revision_id and diff_id map to Phabricator IDs (integers). | ||
revision_id = models.IntegerField(blank=True, null=True, unique=True) | ||
|
||
# diff_id is that of the latest diff on the revision at landing request time. It | ||
# does not track all diffs. | ||
diff_id = models.IntegerField(blank=True, null=True) | ||
|
||
# The actual patch. | ||
# The actual patch with Mercurial metadata format. | ||
patch = models.TextField(blank=True, default="") | ||
|
||
# Patch metadata, such as author, timestamp, etc... | ||
# Patch metadata, such as | ||
# - author_name | ||
# - author_email | ||
# - commit_message | ||
# - timestamp | ||
# - ... | ||
patch_data = models.JSONField(blank=True, default=dict) | ||
|
||
# A general purpose data field to store arbitrary information about this revision. | ||
|
@@ -55,6 +61,11 @@ def __str__(self): | |
# The commit ID generated by the landing worker, before pushing to remote repo. | ||
commit_id = models.CharField(max_length=40, null=True, blank=True) | ||
|
||
_patch_helper: Optional[HgPatchHelper] = None | ||
|
||
def __str__(self): | ||
return f"Revision {self.revision_id} Diff {self.diff_id}" | ||
|
||
def __repr__(self) -> str: | ||
"""Return a human-readable representation of the instance.""" | ||
# Add an identifier for the Phabricator revision if it exists. | ||
|
@@ -67,13 +78,6 @@ def __repr__(self) -> str: | |
def patch_bytes(self) -> bytes: | ||
return self.patch.encode("utf-8") | ||
|
||
@property | ||
def patch_string(self) -> str: | ||
"""Return the patch as a UTF-8 encoded string.""" | ||
# Here for compatiblity, as an alias. | ||
# TODO: remove this in the near future. | ||
return self.patch | ||
|
||
@classmethod | ||
def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: | ||
"""Return a Revision object from a given ID.""" | ||
|
@@ -82,7 +86,14 @@ def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: | |
|
||
@classmethod | ||
def new_from_patch(cls, raw_diff: str, patch_data: dict[str, str]) -> Revision: | ||
"""Construct a new Revision from patch data.""" | ||
"""Construct a new Revision from patch data. | ||
|
||
`patch_data` is expected to contain the following keys: | ||
- author_name | ||
- author_email | ||
- commit_message | ||
- timestamp (unix timestamp as a string) | ||
""" | ||
rev = Revision() | ||
rev.set_patch(raw_diff, patch_data) | ||
rev.save() | ||
|
@@ -104,6 +115,89 @@ def serialize(self) -> dict[str, Any]: | |
"updated_at": self.updated_at, | ||
} | ||
|
||
@property | ||
def author(self): | ||
"""Get the full author string in "Name <Email>" format.""" | ||
parts = [] | ||
if self.author_name: | ||
parts.append(self.author_name) | ||
if self.author_email: | ||
parts.append(f"<{self.author_email}>") | ||
|
||
return " ".join(parts) | ||
|
||
@property | ||
def author_name(self) -> Optional[str]: | ||
metadata = self._parse_metadata_from_patch() | ||
return metadata.get("author_name") | ||
|
||
@property | ||
def author_email(self) -> Optional[str]: | ||
metadata = self._parse_metadata_from_patch() | ||
return metadata.get("author_email") | ||
|
||
@property | ||
def commit_message(self) -> Optional[str]: | ||
metadata = self._parse_metadata_from_patch() | ||
return metadata.get("commit_message") | ||
|
||
@property | ||
def timestamp(self) -> Optional[str]: | ||
metadata = self._parse_metadata_from_patch() | ||
if ts := metadata.get("timestamp"): | ||
# Some codepaths (via Phabricator) have the timestamp set as an int. | ||
# We make sure it's always a string. | ||
return str(ts) | ||
return None | ||
|
||
def _parse_metadata_from_patch(self) -> dict[str, str]: | ||
"""Parse Hg metadata out of the raw patch, and update the patch_data if empty.""" | ||
if not self.patch_data: | ||
commit_message = self.patch_helper.get_commit_description() | ||
author_name, author_email = self._parse_author_string( | ||
self.patch_helper.get_header("User") | ||
) | ||
timestamp = self.patch_helper.get_timestamp() | ||
|
||
self.patch_data = {"commit_message": commit_message, "timestamp": timestamp} | ||
if author_name: | ||
self.patch_data["author_name"] = author_name | ||
if author_email: | ||
self.patch_data["author_email"] = author_email | ||
|
||
return self.patch_data | ||
|
||
@staticmethod | ||
def _parse_author_string(author: str) -> tuple[str, str]: | ||
"""Parse a Git author string into author name and email. | ||
|
||
The returned tuple will have the empty string "" for unmatched parts. | ||
""" | ||
r = re.compile( | ||
r"^(?P<author_name>.*?)? *<?(?P<author_email>[^ \t\n\r\f\v<]+@[^ \t\n\r\f\v>]+)>?" | ||
) | ||
m = r.match(author) | ||
if not m: | ||
return (author, "") | ||
return m.groups() | ||
|
||
@property | ||
def diff(self) -> str: | ||
"""Return the unified diff text without any metadata""" | ||
# The HgPatchHelper currently returns leading newline, which we don't want to | ||
# return here, so we strip it. | ||
return self.patch_helper.get_diff().lstrip() | ||
|
||
@property | ||
def patch_helper(self) -> HgPatchHelper: | ||
"""Create and cache an HgPatchHelper to parse the raw patch with Hg metadata.""" | ||
if not self._patch_helper: | ||
self._patch_helper = HgPatchHelper(StringIO(self.patch)) | ||
if not self._patch_helper.diff_start_line: | ||
raise NoDiffStartLine | ||
|
||
return self._patch_helper | ||
Comment on lines
+181
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two methods are hg-specific, wonder if we should be making them more scm-agnostic? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the purpose of this change is a stop gap to start hiding the HgPatchHelper. Historically, we transformed Phab revision into a Hg Patch, which we no longer need to do. In this first cut, we hide the patch helper from everything else, and let the interface of the Revision be sufficient, and we can change the implementation later on. |
||
|
||
|
||
class DiffWarningStatus(models.TextChoices): | ||
ACTIVE = "ACTIVE", gettext_lazy("Active") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,26 @@ | ||
from datetime import datetime | ||
from unittest.mock import MagicMock, patch | ||
|
||
import pytest | ||
from django.conf import settings | ||
from django.core.exceptions import ValidationError | ||
|
||
from lando.main.models import Repo | ||
from lando.main.models.revision import Revision | ||
from lando.main.scm import ( | ||
SCM_TYPE_GIT, | ||
SCM_TYPE_HG, | ||
) | ||
|
||
DIFF_ONLY = """ | ||
diff --git a/test.txt b/test.txt | ||
--- a/test.txt | ||
+++ b/test.txt | ||
@@ -1,1 +1,2 @@ | ||
TEST | ||
+adding another line | ||
""".lstrip() | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"git_returncode,hg_returncode,scm_type", | ||
|
@@ -84,3 +95,64 @@ def test__models__Repo__system_path_validator(path, expected_exception): | |
repo.clean_fields() | ||
else: | ||
repo.clean_fields() # Should not raise any exception | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"author, expected", | ||
[ | ||
( | ||
"A. Uthor <[email protected]>", | ||
("A. Uthor", "[email protected]"), | ||
), | ||
( | ||
"[email protected]", | ||
("", "[email protected]"), | ||
), | ||
( | ||
"<[email protected]>", | ||
("", "[email protected]"), | ||
), | ||
( | ||
"A. Uthor", | ||
("A. Uthor", ""), | ||
), | ||
( | ||
"@ Uthor", | ||
("@ Uthor", ""), | ||
), | ||
( | ||
"<@ Uthor>", | ||
("<@ Uthor>", ""), | ||
), | ||
], | ||
) | ||
def test__models__Revision___parse_author_string(author, expected): | ||
assert Revision._parse_author_string(author) == expected | ||
|
||
|
||
@pytest.mark.django_db() | ||
def test__models__Revision__metadata(): | ||
author = "A. Uthor" | ||
email = "[email protected]" | ||
commit_message = """Multiline Commit Message | ||
|
||
More lines | ||
""" | ||
timestamp = datetime.now().strftime("%s") | ||
|
||
r = Revision.new_from_patch( | ||
raw_diff=DIFF_ONLY, | ||
patch_data={ | ||
"author_name": author, | ||
"author_email": email, | ||
"commit_message": commit_message, | ||
"timestamp": timestamp, | ||
}, | ||
) | ||
|
||
assert r.author_name == author | ||
assert r.author_email == email | ||
assert r.author == f"{author} <{email}>" | ||
assert r.commit_message == commit_message | ||
assert r.timestamp == timestamp | ||
assert r.diff == DIFF_ONLY |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feels a bit repetitive to call
_parse_metadata_from_patch
in each method. Maybe all these values should be determined at the same time, wheneverself.patch_data
is set.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The metadata is only parsed one, and cached, as part of the
_parse_metadata_from_patch()
method https://github.com/mozilla-conduit/lando/pull/200/files#diff-6f00d8fe4977345e9360a87afcf4861324a3b6ff012d6a6a5716640b4b44c04dR153-R168But calling it in every method makes sure we call it the first time we ever need any metadata.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The repetitiveness aspect (i.e., calling
_parse_metadata_from_patch
in every method) was more so what I meant, not so much efficiency. I think changing_parse_metadata_from_patch
to a property (e.g.,_parsed_metadata_from_patch
) and then using it directly in these methods would alleviate this issue.This would become:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah! Yeah, that's better.