diff --git a/src/lando/api/legacy/workers/landing_worker.py b/src/lando/api/legacy/workers/landing_worker.py index e8ea6679..748b738f 100644 --- a/src/lando/api/legacy/workers/landing_worker.py +++ b/src/lando/api/legacy/workers/landing_worker.py @@ -5,7 +5,6 @@ import subprocess from contextlib import contextmanager from datetime import datetime -from io import StringIO from pathlib import Path from typing import ( Optional, @@ -15,7 +14,6 @@ from django.db import transaction from lando.api.legacy.commit_message import bug_list_to_commit_string, parse_bugs -from lando.api.legacy.hgexports import HgPatchHelper from lando.api.legacy.notifications import ( notify_user_of_bug_update_failure, notify_user_of_landing_failure, @@ -30,6 +28,7 @@ from lando.main.scm.abstract_scm import AbstractSCM from lando.main.scm.exceptions import ( AutoformattingException, + NoDiffStartLine, PatchConflict, SCMException, SCMInternalServerError, @@ -262,13 +261,15 @@ def run_job(self, job: LandingJob) -> bool: # Run through the patches one by one and try to apply them. for revision in job.revisions.all(): - # TODO: Rather than parsing the patch details from the full HG patch - # stored in the job, we should read the revision's metadata (and - # move to only store the diff in the patch_string, rather than an - # export). - # https://bugzilla.mozilla.org/show_bug.cgi?id=1936171 - patch_helper = HgPatchHelper(StringIO(revision.patch_string)) - if not patch_helper.diff_start_line: + + try: + scm.apply_patch( + revision.diff, + revision.commit_message, + revision.author, + revision.timestamp, + ) + except NoDiffStartLine: message = ( "Lando encountered a malformed patch, please try again. " "If this error persists please file a bug: " @@ -282,16 +283,6 @@ def run_job(self, job: LandingJob) -> bool: self.notify_user_of_landing_failure(job) return True - date = patch_helper.get_header("Date") - user = patch_helper.get_header("User") - - try: - scm.apply_patch( - patch_helper.get_diff(), - patch_helper.get_commit_description(), - user, - date, - ) except PatchConflict as exc: breakdown = scm.process_merge_conflict( repo.pull_path, revision.revision_id, str(exc) diff --git a/src/lando/api/tests/test_landings.py b/src/lando/api/tests/test_landings.py index b66b75d5..ba5a2e8a 100644 --- a/src/lando/api/tests/test_landings.py +++ b/src/lando/api/tests/test_landings.py @@ -17,9 +17,8 @@ add_job_with_revisions, ) from lando.main.models.revision import Revision -from lando.main.scm import SCM_TYPE_HG +from lando.main.scm import SCM_TYPE_GIT, SCM_TYPE_HG from lando.main.scm.abstract_scm import AbstractSCM -from lando.main.scm.consts import SCM_TYPE_GIT from lando.main.scm.git import GitSCM from lando.main.scm.hg import HgSCM, LostPushRace from lando.utils import HgPatchHelper diff --git a/src/lando/api/tests/test_transplants.py b/src/lando/api/tests/test_transplants.py index 237df085..4041ef52 100644 --- a/src/lando/api/tests/test_transplants.py +++ b/src/lando/api/tests/test_transplants.py @@ -776,6 +776,7 @@ def test_integrated_transplant_records_approvers_peers_and_owners( worker = LandingWorker(repos=Repo.objects.all(), sleep_seconds=0.01) assert worker.run_job(job) + assert job.status == LandingJobStatus.LANDED for revision in job.revisions.all(): if revision.revision_id == 1: assert revision.data["peers_and_owners"] == [101] @@ -1180,7 +1181,7 @@ def test_integrated_transplant_sec_approval_group_is_excluded_from_reviewers_lis # Check the transplanted patch for our alternate commit message. transplanted_patch = Revision.get_from_revision_id(revision["id"]) assert transplanted_patch is not None, "Transplanted patch should be retrievable." - assert sec_approval_project["name"] not in transplanted_patch.patch_string + assert sec_approval_project["name"] not in transplanted_patch.patch def test_warning_wip_commit_message(phabdouble): diff --git a/src/lando/main/models/revision.py b/src/lando/main/models/revision.py index 677d36fb..52adfb02 100644 --- a/src/lando/main/models/revision.py +++ b/src/lando/main/models/revision.py @@ -8,12 +8,16 @@ from __future__ import annotations import logging -from typing import Any +import re +from io import StringIO +from typing import Any, Optional from django.db import models from django.utils.translation import gettext_lazy +from lando.api.legacy.hgexports import HgPatchHelper from lando.main.models.base import BaseModel +from lando.main.scm.exceptions import NoDiffStartLine from lando.utils import build_patch_for_revision logger = logging.getLogger(__name__) @@ -33,9 +37,6 @@ class Revision(BaseModel): Includes a reference to the related Phabricator revision and diff ID if one exists. """ - def __str__(self): - return f"Revision {self.revision_id} Diff {self.diff_id}" - # revision_id and diff_id map to Phabricator IDs (integers). revision_id = models.IntegerField(blank=True, null=True, unique=True) @@ -43,10 +44,15 @@ def __str__(self): # does not track all diffs. diff_id = models.IntegerField(blank=True, null=True) - # The actual patch. + # The actual patch with Mercurial metadata format. patch = models.TextField(blank=True, default="") - # Patch metadata, such as author, timestamp, etc... + # Patch metadata, such as + # - author_name + # - author_email + # - commit_message + # - timestamp + # - ... patch_data = models.JSONField(blank=True, default=dict) # A general purpose data field to store arbitrary information about this revision. @@ -55,6 +61,11 @@ def __str__(self): # The commit ID generated by the landing worker, before pushing to remote repo. commit_id = models.CharField(max_length=40, null=True, blank=True) + _patch_helper: Optional[HgPatchHelper] = None + + def __str__(self): + return f"Revision {self.revision_id} Diff {self.diff_id}" + def __repr__(self) -> str: """Return a human-readable representation of the instance.""" # Add an identifier for the Phabricator revision if it exists. @@ -67,13 +78,6 @@ def __repr__(self) -> str: def patch_bytes(self) -> bytes: return self.patch.encode("utf-8") - @property - def patch_string(self) -> str: - """Return the patch as a UTF-8 encoded string.""" - # Here for compatiblity, as an alias. - # TODO: remove this in the near future. - return self.patch - @classmethod def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: """Return a Revision object from a given ID.""" @@ -82,7 +86,14 @@ def get_from_revision_id(cls, revision_id: int) -> "Revision" | None: @classmethod def new_from_patch(cls, raw_diff: str, patch_data: dict[str, str]) -> Revision: - """Construct a new Revision from patch data.""" + """Construct a new Revision from patch data. + + `patch_data` is expected to contain the following keys: + - author_name + - author_email + - commit_message + - timestamp (unix timestamp as a string) + """ rev = Revision() rev.set_patch(raw_diff, patch_data) rev.save() @@ -104,6 +115,86 @@ def serialize(self) -> dict[str, Any]: "updated_at": self.updated_at, } + @property + def author(self): + """Get the full author string in "Name " format.""" + parts = [] + if self.author_name: + parts.append(self.author_name) + if self.author_email: + parts.append(f"<{self.author_email}>") + + return " ".join(parts) + + @property + def author_name(self) -> Optional[str]: + return self.metadata.get("author_name") + + @property + def author_email(self) -> Optional[str]: + return self.metadata.get("author_email") + + @property + def commit_message(self) -> Optional[str]: + return self.metadata.get("commit_message") + + @property + def timestamp(self) -> Optional[str]: + if ts := self.metadata.get("timestamp"): + # Some codepaths (via Phabricator) have the timestamp set as an int. + # We make sure it's always a string. + return str(ts) + return None + + @property + def metadata(self) -> dict[str, str]: + """Parse Hg metadata out of the raw patch, and update the patch_data if empty.""" + if not self.patch_data: + commit_message = self.patch_helper.get_commit_description() + author_name, author_email = self._parse_author_string( + self.patch_helper.get_header("User") + ) + timestamp = self.patch_helper.get_timestamp() + + self.patch_data = {"commit_message": commit_message, "timestamp": timestamp} + if author_name: + self.patch_data["author_name"] = author_name + if author_email: + self.patch_data["author_email"] = author_email + + return self.patch_data + + @staticmethod + def _parse_author_string(author: str) -> tuple[str, str]: + """Parse a Git author string into author name and email. + + The returned tuple will have the empty string "" for unmatched parts. + """ + r = re.compile( + r"^(?P.*?)? *[^ \t\n\r\f\v<]+@[^ \t\n\r\f\v>]+)>?" + ) + m = r.match(author) + if not m: + return (author, "") + return m.groups() + + @property + def diff(self) -> str: + """Return the unified diff text without any metadata""" + # The HgPatchHelper currently returns leading newline, which we don't want to + # return here, so we strip it. + return self.patch_helper.get_diff().lstrip() + + @property + def patch_helper(self) -> HgPatchHelper: + """Create and cache an HgPatchHelper to parse the raw patch with Hg metadata.""" + if not self._patch_helper: + self._patch_helper = HgPatchHelper(StringIO(self.patch)) + if not self._patch_helper.diff_start_line: + raise NoDiffStartLine + + return self._patch_helper + class DiffWarningStatus(models.TextChoices): ACTIVE = "ACTIVE", gettext_lazy("Active") diff --git a/src/lando/main/scm/hg.py b/src/lando/main/scm/hg.py index 454be76c..7abeb9d5 100644 --- a/src/lando/main/scm/hg.py +++ b/src/lando/main/scm/hg.py @@ -268,6 +268,10 @@ def apply_patch( # patcher since both attempts failed. raise exc + if re.match("^[0-9]+$", commit_date): + # If the commit_date is a unix timestamp, convert to Hg internal format. + commit_date = f"{commit_date} 0" + self.run_hg( ["commit"] + ["--date", commit_date] diff --git a/src/lando/main/tests/test_models.py b/src/lando/main/tests/test_models.py index 00434193..81a67035 100644 --- a/src/lando/main/tests/test_models.py +++ b/src/lando/main/tests/test_models.py @@ -1,3 +1,4 @@ +from datetime import datetime from unittest.mock import MagicMock, patch import pytest @@ -5,11 +6,21 @@ from django.core.exceptions import ValidationError from lando.main.models import Repo +from lando.main.models.revision import Revision from lando.main.scm import ( SCM_TYPE_GIT, SCM_TYPE_HG, ) +DIFF_ONLY = """ +diff --git a/test.txt b/test.txt +--- a/test.txt ++++ b/test.txt +@@ -1,1 +1,2 @@ + TEST ++adding another line +""".lstrip() + @pytest.mark.parametrize( "git_returncode,hg_returncode,scm_type", @@ -84,3 +95,64 @@ def test__models__Repo__system_path_validator(path, expected_exception): repo.clean_fields() else: repo.clean_fields() # Should not raise any exception + + +@pytest.mark.parametrize( + "author, expected", + [ + ( + "A. Uthor ", + ("A. Uthor", "author@moz.test"), + ), + ( + "author@moz.test", + ("", "author@moz.test"), + ), + ( + "", + ("", "author@moz.test"), + ), + ( + "A. Uthor", + ("A. Uthor", ""), + ), + ( + "@ Uthor", + ("@ Uthor", ""), + ), + ( + "<@ Uthor>", + ("<@ Uthor>", ""), + ), + ], +) +def test__models__Revision___parse_author_string(author, expected): + assert Revision._parse_author_string(author) == expected + + +@pytest.mark.django_db() +def test__models__Revision__metadata(): + author = "A. Uthor" + email = "author@moz.test" + commit_message = """Multiline Commit Message + + More lines + """ + timestamp = datetime.now().strftime("%s") + + r = Revision.new_from_patch( + raw_diff=DIFF_ONLY, + patch_data={ + "author_name": author, + "author_email": email, + "commit_message": commit_message, + "timestamp": timestamp, + }, + ) + + assert r.author_name == author + assert r.author_email == email + assert r.author == f"{author} <{email}>" + assert r.commit_message == commit_message + assert r.timestamp == timestamp + assert r.diff == DIFF_ONLY