Skip to content

Commit

Permalink
landing_worker: metadata from Revision rather than HgPatchHelper (bug…
Browse files Browse the repository at this point in the history
… 1936171) (#200)

The ultimate goal of this change is to deprecate the back-and-forth use
of the HgPatchHelper. This first change allows to confine the parsing of
an Hg patch to the Revision object, which allows exposing cleaned-up
attribute to the rest of the system.

---

* revision: deprecate patch_string in favour of patch
* revision: add individual properties for patch metadata (bug 1936171)
* landing_worker: use metadata from Revision rather than HgPatchHelper …
* test_transplants: add missing job status assertion
  • Loading branch information
shtrom authored Feb 3, 2025
1 parent 1ce04bb commit e9407cf
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 36 deletions.
29 changes: 10 additions & 19 deletions src/lando/api/legacy/workers/landing_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import subprocess
from contextlib import contextmanager
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import (
Optional,
Expand All @@ -15,7 +14,6 @@
from django.db import transaction

from lando.api.legacy.commit_message import bug_list_to_commit_string, parse_bugs
from lando.api.legacy.hgexports import HgPatchHelper
from lando.api.legacy.notifications import (
notify_user_of_bug_update_failure,
notify_user_of_landing_failure,
Expand All @@ -30,6 +28,7 @@
from lando.main.scm.abstract_scm import AbstractSCM
from lando.main.scm.exceptions import (
AutoformattingException,
NoDiffStartLine,
PatchConflict,
SCMException,
SCMInternalServerError,
Expand Down Expand Up @@ -262,13 +261,15 @@ def run_job(self, job: LandingJob) -> bool:

# Run through the patches one by one and try to apply them.
for revision in job.revisions.all():
# TODO: Rather than parsing the patch details from the full HG patch
# stored in the job, we should read the revision's metadata (and
# move to only store the diff in the patch_string, rather than an
# export).
# https://bugzilla.mozilla.org/show_bug.cgi?id=1936171
patch_helper = HgPatchHelper(StringIO(revision.patch_string))
if not patch_helper.diff_start_line:

try:
scm.apply_patch(
revision.diff,
revision.commit_message,
revision.author,
revision.timestamp,
)
except NoDiffStartLine:
message = (
"Lando encountered a malformed patch, please try again. "
"If this error persists please file a bug: "
Expand All @@ -282,16 +283,6 @@ def run_job(self, job: LandingJob) -> bool:
self.notify_user_of_landing_failure(job)
return True

date = patch_helper.get_header("Date")
user = patch_helper.get_header("User")

try:
scm.apply_patch(
patch_helper.get_diff(),
patch_helper.get_commit_description(),
user,
date,
)
except PatchConflict as exc:
breakdown = scm.process_merge_conflict(
repo.pull_path, revision.revision_id, str(exc)
Expand Down
3 changes: 1 addition & 2 deletions src/lando/api/tests/test_landings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
add_job_with_revisions,
)
from lando.main.models.revision import Revision
from lando.main.scm import SCM_TYPE_HG
from lando.main.scm import SCM_TYPE_GIT, SCM_TYPE_HG
from lando.main.scm.abstract_scm import AbstractSCM
from lando.main.scm.consts import SCM_TYPE_GIT
from lando.main.scm.git import GitSCM
from lando.main.scm.hg import HgSCM, LostPushRace
from lando.utils import HgPatchHelper
Expand Down
3 changes: 2 additions & 1 deletion src/lando/api/tests/test_transplants.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ def test_integrated_transplant_records_approvers_peers_and_owners(

worker = LandingWorker(repos=Repo.objects.all(), sleep_seconds=0.01)
assert worker.run_job(job)
assert job.status == LandingJobStatus.LANDED
for revision in job.revisions.all():
if revision.revision_id == 1:
assert revision.data["peers_and_owners"] == [101]
Expand Down Expand Up @@ -1180,7 +1181,7 @@ def test_integrated_transplant_sec_approval_group_is_excluded_from_reviewers_lis
# Check the transplanted patch for our alternate commit message.
transplanted_patch = Revision.get_from_revision_id(revision["id"])
assert transplanted_patch is not None, "Transplanted patch should be retrievable."
assert sec_approval_project["name"] not in transplanted_patch.patch_string
assert sec_approval_project["name"] not in transplanted_patch.patch


def test_warning_wip_commit_message(phabdouble):
Expand Down
119 changes: 105 additions & 14 deletions src/lando/main/models/revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@
from __future__ import annotations

import logging
from typing import Any
import re
from io import StringIO
from typing import Any, Optional

from django.db import models
from django.utils.translation import gettext_lazy

from lando.api.legacy.hgexports import HgPatchHelper
from lando.main.models.base import BaseModel
from lando.main.scm.exceptions import NoDiffStartLine
from lando.utils import build_patch_for_revision

logger = logging.getLogger(__name__)
Expand All @@ -33,20 +37,22 @@ class Revision(BaseModel):
Includes a reference to the related Phabricator revision and diff ID if one exists.
"""

def __str__(self):
return f"Revision {self.revision_id} Diff {self.diff_id}"

# revision_id and diff_id map to Phabricator IDs (integers).
revision_id = models.IntegerField(blank=True, null=True, unique=True)

# diff_id is that of the latest diff on the revision at landing request time. It
# does not track all diffs.
diff_id = models.IntegerField(blank=True, null=True)

# The actual patch.
# The actual patch with Mercurial metadata format.
patch = models.TextField(blank=True, default="")

# Patch metadata, such as author, timestamp, etc...
# Patch metadata, such as
# - author_name
# - author_email
# - commit_message
# - timestamp
# - ...
patch_data = models.JSONField(blank=True, default=dict)

# A general purpose data field to store arbitrary information about this revision.
Expand All @@ -55,6 +61,11 @@ def __str__(self):
# The commit ID generated by the landing worker, before pushing to remote repo.
commit_id = models.CharField(max_length=40, null=True, blank=True)

_patch_helper: Optional[HgPatchHelper] = None

def __str__(self):
return f"Revision {self.revision_id} Diff {self.diff_id}"

def __repr__(self) -> str:
"""Return a human-readable representation of the instance."""
# Add an identifier for the Phabricator revision if it exists.
Expand All @@ -67,13 +78,6 @@ def __repr__(self) -> str:
def patch_bytes(self) -> bytes:
return self.patch.encode("utf-8")

@property
def patch_string(self) -> str:
"""Return the patch as a UTF-8 encoded string."""
# Here for compatiblity, as an alias.
# TODO: remove this in the near future.
return self.patch

@classmethod
def get_from_revision_id(cls, revision_id: int) -> "Revision" | None:
"""Return a Revision object from a given ID."""
Expand All @@ -82,7 +86,14 @@ def get_from_revision_id(cls, revision_id: int) -> "Revision" | None:

@classmethod
def new_from_patch(cls, raw_diff: str, patch_data: dict[str, str]) -> Revision:
"""Construct a new Revision from patch data."""
"""Construct a new Revision from patch data.
`patch_data` is expected to contain the following keys:
- author_name
- author_email
- commit_message
- timestamp (unix timestamp as a string)
"""
rev = Revision()
rev.set_patch(raw_diff, patch_data)
rev.save()
Expand All @@ -104,6 +115,86 @@ def serialize(self) -> dict[str, Any]:
"updated_at": self.updated_at,
}

@property
def author(self):
"""Get the full author string in "Name <Email>" format."""
parts = []
if self.author_name:
parts.append(self.author_name)
if self.author_email:
parts.append(f"<{self.author_email}>")

return " ".join(parts)

@property
def author_name(self) -> Optional[str]:
return self.metadata.get("author_name")

@property
def author_email(self) -> Optional[str]:
return self.metadata.get("author_email")

@property
def commit_message(self) -> Optional[str]:
return self.metadata.get("commit_message")

@property
def timestamp(self) -> Optional[str]:
if ts := self.metadata.get("timestamp"):
# Some codepaths (via Phabricator) have the timestamp set as an int.
# We make sure it's always a string.
return str(ts)
return None

@property
def metadata(self) -> dict[str, str]:
"""Parse Hg metadata out of the raw patch, and update the patch_data if empty."""
if not self.patch_data:
commit_message = self.patch_helper.get_commit_description()
author_name, author_email = self._parse_author_string(
self.patch_helper.get_header("User")
)
timestamp = self.patch_helper.get_timestamp()

self.patch_data = {"commit_message": commit_message, "timestamp": timestamp}
if author_name:
self.patch_data["author_name"] = author_name
if author_email:
self.patch_data["author_email"] = author_email

return self.patch_data

@staticmethod
def _parse_author_string(author: str) -> tuple[str, str]:
"""Parse a Git author string into author name and email.
The returned tuple will have the empty string "" for unmatched parts.
"""
r = re.compile(
r"^(?P<author_name>.*?)? *<?(?P<author_email>[^ \t\n\r\f\v<]+@[^ \t\n\r\f\v>]+)>?"
)
m = r.match(author)
if not m:
return (author, "")
return m.groups()

@property
def diff(self) -> str:
"""Return the unified diff text without any metadata"""
# The HgPatchHelper currently returns leading newline, which we don't want to
# return here, so we strip it.
return self.patch_helper.get_diff().lstrip()

@property
def patch_helper(self) -> HgPatchHelper:
"""Create and cache an HgPatchHelper to parse the raw patch with Hg metadata."""
if not self._patch_helper:
self._patch_helper = HgPatchHelper(StringIO(self.patch))
if not self._patch_helper.diff_start_line:
raise NoDiffStartLine

return self._patch_helper


class DiffWarningStatus(models.TextChoices):
ACTIVE = "ACTIVE", gettext_lazy("Active")
Expand Down
4 changes: 4 additions & 0 deletions src/lando/main/scm/hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ def apply_patch(
# patcher since both attempts failed.
raise exc

if re.match("^[0-9]+$", commit_date):
# If the commit_date is a unix timestamp, convert to Hg internal format.
commit_date = f"{commit_date} 0"

self.run_hg(
["commit"]
+ ["--date", commit_date]
Expand Down
72 changes: 72 additions & 0 deletions src/lando/main/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
from datetime import datetime
from unittest.mock import MagicMock, patch

import pytest
from django.conf import settings
from django.core.exceptions import ValidationError

from lando.main.models import Repo
from lando.main.models.revision import Revision
from lando.main.scm import (
SCM_TYPE_GIT,
SCM_TYPE_HG,
)

DIFF_ONLY = """
diff --git a/test.txt b/test.txt
--- a/test.txt
+++ b/test.txt
@@ -1,1 +1,2 @@
TEST
+adding another line
""".lstrip()


@pytest.mark.parametrize(
"git_returncode,hg_returncode,scm_type",
Expand Down Expand Up @@ -84,3 +95,64 @@ def test__models__Repo__system_path_validator(path, expected_exception):
repo.clean_fields()
else:
repo.clean_fields() # Should not raise any exception


@pytest.mark.parametrize(
"author, expected",
[
(
"A. Uthor <[email protected]>",
("A. Uthor", "[email protected]"),
),
(
"[email protected]",
("", "[email protected]"),
),
(
"<[email protected]>",
("", "[email protected]"),
),
(
"A. Uthor",
("A. Uthor", ""),
),
(
"@ Uthor",
("@ Uthor", ""),
),
(
"<@ Uthor>",
("<@ Uthor>", ""),
),
],
)
def test__models__Revision___parse_author_string(author, expected):
assert Revision._parse_author_string(author) == expected


@pytest.mark.django_db()
def test__models__Revision__metadata():
author = "A. Uthor"
email = "[email protected]"
commit_message = """Multiline Commit Message
More lines
"""
timestamp = datetime.now().strftime("%s")

r = Revision.new_from_patch(
raw_diff=DIFF_ONLY,
patch_data={
"author_name": author,
"author_email": email,
"commit_message": commit_message,
"timestamp": timestamp,
},
)

assert r.author_name == author
assert r.author_email == email
assert r.author == f"{author} <{email}>"
assert r.commit_message == commit_message
assert r.timestamp == timestamp
assert r.diff == DIFF_ONLY

0 comments on commit e9407cf

Please sign in to comment.