From 95643ea3650718eedd748d5470d9e3b6733fe328 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 23 Jan 2025 10:49:19 +1100 Subject: [PATCH 1/5] revision: deprecate patch_string in favour of patch --- src/lando/api/legacy/workers/landing_worker.py | 4 ++-- src/lando/api/tests/test_landings.py | 3 +-- src/lando/api/tests/test_transplants.py | 2 +- src/lando/main/models/revision.py | 7 ------- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/lando/api/legacy/workers/landing_worker.py b/src/lando/api/legacy/workers/landing_worker.py index e8ea6679..ba3bcd78 100644 --- a/src/lando/api/legacy/workers/landing_worker.py +++ b/src/lando/api/legacy/workers/landing_worker.py @@ -264,10 +264,10 @@ def run_job(self, job: LandingJob) -> bool: for revision in job.revisions.all(): # TODO: Rather than parsing the patch details from the full HG patch # stored in the job, we should read the revision's metadata (and - # move to only store the diff in the patch_string, rather than an + # move to only store the diff in the patch, rather than an # export). # https://bugzilla.mozilla.org/show_bug.cgi?id=1936171 - patch_helper = HgPatchHelper(StringIO(revision.patch_string)) + patch_helper = HgPatchHelper(StringIO(revision.patch)) if not patch_helper.diff_start_line: message = ( "Lando encountered a malformed patch, please try again. " diff --git a/src/lando/api/tests/test_landings.py b/src/lando/api/tests/test_landings.py index e7940c7f..3b36ba1b 100644 --- a/src/lando/api/tests/test_landings.py +++ b/src/lando/api/tests/test_landings.py @@ -17,9 +17,8 @@ add_job_with_revisions, ) from lando.main.models.revision import Revision -from lando.main.scm import SCM_TYPE_HG +from lando.main.scm import SCM_TYPE_GIT, SCM_TYPE_HG from lando.main.scm.abstract_scm import AbstractSCM -from lando.main.scm.consts import SCM_TYPE_GIT from lando.main.scm.git import GitSCM from lando.main.scm.hg import HgSCM, LostPushRace from lando.utils import HgPatchHelper diff --git a/src/lando/api/tests/test_transplants.py b/src/lando/api/tests/test_transplants.py index dea1a8b1..968e2bd2 100644 --- a/src/lando/api/tests/test_transplants.py +++ b/src/lando/api/tests/test_transplants.py @@ -1184,7 +1184,7 @@ def test_integrated_transplant_sec_approval_group_is_excluded_from_reviewers_lis # Check the transplanted patch for our alternate commit message. transplanted_patch = Revision.get_from_revision_id(revision["id"]) assert transplanted_patch is not None, "Transplanted patch should be retrievable." - assert sec_approval_project["name"] not in transplanted_patch.patch_string + assert sec_approval_project["name"] not in transplanted_patch.patch def test_warning_wip_commit_message(phabdouble): diff --git a/src/lando/main/models/revision.py b/src/lando/main/models/revision.py index b0adafc9..09d257dc 100644 --- a/src/lando/main/models/revision.py +++ b/src/lando/main/models/revision.py @@ -67,13 +67,6 @@ def __repr__(self) -> str: def patch_bytes(self) -> bytes: return self.patch.encode("utf-8") - @property - def patch_string(self) -> str: - """Return the patch as a UTF-8 encoded string.""" - # Here for compatiblity, as an alias. - # TODO: remove this in the near future. - return self.patch - @classmethod def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: """Return a Revision object from a given ID.""" From 8ff9b8f13eeb37863da2dbdc193615e580365764 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 23 Jan 2025 12:01:20 +1100 Subject: [PATCH 2/5] revision: add individual properties for patch metadata (bug 1936171) The ultimate goal of this change is to deprecate the back-and-forth use of the HgPatchHelper. This first change allows to confine the parsing of an Hg patch to the Revision object, which allows exposing cleaned-up attribute to the rest of the system. --- src/lando/main/models/revision.py | 116 ++++++++++++++++++++++++++-- src/lando/main/scm/hg.py | 4 + src/lando/main/tests/test_models.py | 72 +++++++++++++++++ 3 files changed, 185 insertions(+), 7 deletions(-) diff --git a/src/lando/main/models/revision.py b/src/lando/main/models/revision.py index 09d257dc..964b288b 100644 --- a/src/lando/main/models/revision.py +++ b/src/lando/main/models/revision.py @@ -8,12 +8,17 @@ from __future__ import annotations import logging -from typing import Any +import re +import time +from io import StringIO +from typing import Any, Optional from django.db import models from django.utils.translation import gettext_lazy +from lando.api.legacy.hgexports import HgPatchHelper from lando.main.models.base import BaseModel +from lando.main.scm.exceptions import NoDiffStartLine from lando.utils import build_patch_for_revision logger = logging.getLogger(__name__) @@ -33,9 +38,6 @@ class Revision(BaseModel): Includes a reference to the related Phabricator revision and diff ID if one exists. """ - def __str__(self): - return f"Revision {self.revision_id} Diff {self.diff_id}" - # revision_id and diff_id map to Phabricator IDs (integers). revision_id = models.IntegerField(blank=True, null=True, unique=True) @@ -43,10 +45,15 @@ def __str__(self): # does not track all diffs. diff_id = models.IntegerField(blank=True, null=True) - # The actual patch. + # The actual patch with Mercurial metadata format. patch = models.TextField(blank=True, default="") - # Patch metadata, such as author, timestamp, etc... + # Patch metadata, such as + # - author_name + # - author_email + # - commit_message + # - timestamp + # - ... patch_data = models.JSONField(blank=True, default=dict) # A general purpose data field to store arbitrary information about this revision. @@ -55,6 +62,11 @@ def __str__(self): # The commit ID generated by the landing worker, before pushing to remote repo. commit_id = models.CharField(max_length=40, null=True, blank=True) + _patch_helper: Optional[HgPatchHelper] = None + + def __str__(self): + return f"Revision {self.revision_id} Diff {self.diff_id}" + def __repr__(self) -> str: """Return a human-readable representation of the instance.""" # Add an identifier for the Phabricator revision if it exists. @@ -75,7 +87,14 @@ def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: @classmethod def new_from_patch(cls, raw_diff: str, patch_data: dict[str, str]) -> Revision: - """Construct a new Revision from patch data.""" + """Construct a new Revision from patch data. + + `patch_data` is expected to contain the following keys: + - author_name + - author_email + - commit_message + - timestamp (unix timestamp as a string) + """ rev = Revision() rev.set_patch(raw_diff, patch_data) rev.save() @@ -97,6 +116,89 @@ def serialize(self) -> dict[str, Any]: "updated_at": self.updated_at, } + @property + def author(self): + """Get the full author string in "Name " format.""" + parts = [] + if self.author_name: + parts.append(self.author_name) + if self.author_email: + parts.append(f"<{self.author_email}>") + + return " ".join(parts) + + @property + def author_name(self) -> Optional[str]: + metadata = self._parse_metadata_from_patch() + return metadata.get("author_name") + + @property + def author_email(self) -> Optional[str]: + metadata = self._parse_metadata_from_patch() + return metadata.get("author_email") + + @property + def commit_message(self) -> Optional[str]: + metadata = self._parse_metadata_from_patch() + return metadata.get("commit_message") + + @property + def timestamp(self) -> Optional[str]: + metadata = self._parse_metadata_from_patch() + if ts := metadata.get("timestamp"): + # Some codepaths (via Phabricator) have the timestamp set as an int. + # We make sure it's always a string. + return str(ts) + return None + + def _parse_metadata_from_patch(self) -> dict[str, str]: + """Parse Hg metadata out of the raw patch, and update the patch_data if empty.""" + if not self.patch_data: + commit_message = self.patch_helper.get_commit_description() + author_name, author_email = self._parse_author_string( + self.patch_helper.get_header("User") + ) + timestamp = self.patch_helper.get_timestamp() + + self.patch_data = {"commit_message": commit_message, "timestamp": timestamp} + if author_name: + self.patch_data["author_name"] = author_name + if author_email: + self.patch_data["author_email"] = author_email + + return self.patch_data + + @staticmethod + def _parse_author_string(author: str) -> tuple[str, str]: + """Parse a Git author string into author name and email. + + The returned tuple will have the empty string "" for unmatched parts. + """ + r = re.compile( + r"^(?P.*?)? *[^ \t\n\r\f\v<]+@[^ \t\n\r\f\v>]+)>?" + ) + m = r.match(author) + if not m: + return (author, "") + return m.groups() + + @property + def diff(self) -> str: + """Return the unified diff text without any metadata""" + # The HgPatchHelper currently returns leading newline, which we don't want to + # return here, so we strip it. + return self.patch_helper.get_diff().lstrip() + + @property + def patch_helper(self) -> HgPatchHelper: + """Create and cache an HgPatchHelper to parse the raw patch with Hg metadata.""" + if not self._patch_helper: + self._patch_helper = HgPatchHelper(StringIO(self.patch)) + if not self._patch_helper.diff_start_line: + raise NoDiffStartLine + + return self._patch_helper + class DiffWarningStatus(models.TextChoices): ACTIVE = "ACTIVE", gettext_lazy("Active") diff --git a/src/lando/main/scm/hg.py b/src/lando/main/scm/hg.py index 454be76c..7abeb9d5 100644 --- a/src/lando/main/scm/hg.py +++ b/src/lando/main/scm/hg.py @@ -268,6 +268,10 @@ def apply_patch( # patcher since both attempts failed. raise exc + if re.match("^[0-9]+$", commit_date): + # If the commit_date is a unix timestamp, convert to Hg internal format. + commit_date = f"{commit_date} 0" + self.run_hg( ["commit"] + ["--date", commit_date] diff --git a/src/lando/main/tests/test_models.py b/src/lando/main/tests/test_models.py index 00434193..81a67035 100644 --- a/src/lando/main/tests/test_models.py +++ b/src/lando/main/tests/test_models.py @@ -1,3 +1,4 @@ +from datetime import datetime from unittest.mock import MagicMock, patch import pytest @@ -5,11 +6,21 @@ from django.core.exceptions import ValidationError from lando.main.models import Repo +from lando.main.models.revision import Revision from lando.main.scm import ( SCM_TYPE_GIT, SCM_TYPE_HG, ) +DIFF_ONLY = """ +diff --git a/test.txt b/test.txt +--- a/test.txt ++++ b/test.txt +@@ -1,1 +1,2 @@ + TEST ++adding another line +""".lstrip() + @pytest.mark.parametrize( "git_returncode,hg_returncode,scm_type", @@ -84,3 +95,64 @@ def test__models__Repo__system_path_validator(path, expected_exception): repo.clean_fields() else: repo.clean_fields() # Should not raise any exception + + +@pytest.mark.parametrize( + "author, expected", + [ + ( + "A. Uthor ", + ("A. Uthor", "author@moz.test"), + ), + ( + "author@moz.test", + ("", "author@moz.test"), + ), + ( + "", + ("", "author@moz.test"), + ), + ( + "A. Uthor", + ("A. Uthor", ""), + ), + ( + "@ Uthor", + ("@ Uthor", ""), + ), + ( + "<@ Uthor>", + ("<@ Uthor>", ""), + ), + ], +) +def test__models__Revision___parse_author_string(author, expected): + assert Revision._parse_author_string(author) == expected + + +@pytest.mark.django_db() +def test__models__Revision__metadata(): + author = "A. Uthor" + email = "author@moz.test" + commit_message = """Multiline Commit Message + + More lines + """ + timestamp = datetime.now().strftime("%s") + + r = Revision.new_from_patch( + raw_diff=DIFF_ONLY, + patch_data={ + "author_name": author, + "author_email": email, + "commit_message": commit_message, + "timestamp": timestamp, + }, + ) + + assert r.author_name == author + assert r.author_email == email + assert r.author == f"{author} <{email}>" + assert r.commit_message == commit_message + assert r.timestamp == timestamp + assert r.diff == DIFF_ONLY From e9e807fdf76d52e2df8f64643b5a240f7130e4eb Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 23 Jan 2025 14:29:11 +1100 Subject: [PATCH 3/5] landing_worker: use metadata from Revision rather than HgPatchHelper (bug 1936171) --- .../api/legacy/workers/landing_worker.py | 29 +++++++------------ src/lando/main/models/revision.py | 1 - 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/lando/api/legacy/workers/landing_worker.py b/src/lando/api/legacy/workers/landing_worker.py index ba3bcd78..748b738f 100644 --- a/src/lando/api/legacy/workers/landing_worker.py +++ b/src/lando/api/legacy/workers/landing_worker.py @@ -5,7 +5,6 @@ import subprocess from contextlib import contextmanager from datetime import datetime -from io import StringIO from pathlib import Path from typing import ( Optional, @@ -15,7 +14,6 @@ from django.db import transaction from lando.api.legacy.commit_message import bug_list_to_commit_string, parse_bugs -from lando.api.legacy.hgexports import HgPatchHelper from lando.api.legacy.notifications import ( notify_user_of_bug_update_failure, notify_user_of_landing_failure, @@ -30,6 +28,7 @@ from lando.main.scm.abstract_scm import AbstractSCM from lando.main.scm.exceptions import ( AutoformattingException, + NoDiffStartLine, PatchConflict, SCMException, SCMInternalServerError, @@ -262,13 +261,15 @@ def run_job(self, job: LandingJob) -> bool: # Run through the patches one by one and try to apply them. for revision in job.revisions.all(): - # TODO: Rather than parsing the patch details from the full HG patch - # stored in the job, we should read the revision's metadata (and - # move to only store the diff in the patch, rather than an - # export). - # https://bugzilla.mozilla.org/show_bug.cgi?id=1936171 - patch_helper = HgPatchHelper(StringIO(revision.patch)) - if not patch_helper.diff_start_line: + + try: + scm.apply_patch( + revision.diff, + revision.commit_message, + revision.author, + revision.timestamp, + ) + except NoDiffStartLine: message = ( "Lando encountered a malformed patch, please try again. " "If this error persists please file a bug: " @@ -282,16 +283,6 @@ def run_job(self, job: LandingJob) -> bool: self.notify_user_of_landing_failure(job) return True - date = patch_helper.get_header("Date") - user = patch_helper.get_header("User") - - try: - scm.apply_patch( - patch_helper.get_diff(), - patch_helper.get_commit_description(), - user, - date, - ) except PatchConflict as exc: breakdown = scm.process_merge_conflict( repo.pull_path, revision.revision_id, str(exc) diff --git a/src/lando/main/models/revision.py b/src/lando/main/models/revision.py index 964b288b..01867f83 100644 --- a/src/lando/main/models/revision.py +++ b/src/lando/main/models/revision.py @@ -9,7 +9,6 @@ import logging import re -import time from io import StringIO from typing import Any, Optional From 11f298f036af18c85998c25b1f7e55539509b0bc Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 23 Jan 2025 17:15:05 +1100 Subject: [PATCH 4/5] test_transplants: add missing job status assertion --- src/lando/api/tests/test_transplants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lando/api/tests/test_transplants.py b/src/lando/api/tests/test_transplants.py index 968e2bd2..28dc2af3 100644 --- a/src/lando/api/tests/test_transplants.py +++ b/src/lando/api/tests/test_transplants.py @@ -780,6 +780,7 @@ def test_integrated_transplant_records_approvers_peers_and_owners( worker = LandingWorker(repos=Repo.objects.all(), sleep_seconds=0.01) assert worker.run_job(job) + assert job.status == LandingJobStatus.LANDED for revision in job.revisions.all(): if revision.revision_id == 1: assert revision.data["peers_and_owners"] == [101] From eab066665801213bc1049e8b76e6c3c51b6292e3 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Mon, 3 Feb 2025 10:16:58 +1100 Subject: [PATCH 5/5] fixup! revision: add individual properties for patch metadata (bug 1936171) --- src/lando/main/models/revision.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lando/main/models/revision.py b/src/lando/main/models/revision.py index 01867f83..583f9d6f 100644 --- a/src/lando/main/models/revision.py +++ b/src/lando/main/models/revision.py @@ -128,29 +128,26 @@ def author(self): @property def author_name(self) -> Optional[str]: - metadata = self._parse_metadata_from_patch() - return metadata.get("author_name") + return self.metadata.get("author_name") @property def author_email(self) -> Optional[str]: - metadata = self._parse_metadata_from_patch() - return metadata.get("author_email") + return self.metadata.get("author_email") @property def commit_message(self) -> Optional[str]: - metadata = self._parse_metadata_from_patch() - return metadata.get("commit_message") + return self.metadata.get("commit_message") @property def timestamp(self) -> Optional[str]: - metadata = self._parse_metadata_from_patch() - if ts := metadata.get("timestamp"): + if ts := self.metadata.get("timestamp"): # Some codepaths (via Phabricator) have the timestamp set as an int. # We make sure it's always a string. return str(ts) return None - def _parse_metadata_from_patch(self) -> dict[str, str]: + @property + def metadata(self) -> dict[str, str]: """Parse Hg metadata out of the raw patch, and update the patch_data if empty.""" if not self.patch_data: commit_message = self.patch_helper.get_commit_description()