AbstractSCM: move process_merge_conflict to SCM (bug 1940876) (#191)

* test_landings: update repo_mc factory to allow parameters * test_landings: reenable git apply_patch tests (bug 1940876) * GitSCM: implement process_merge_conflicts (and generate rejects) * AbstractSCM: move process_merge_conflict to SCM (bug 1940876) * GitSCM.update_repo: use git fetch rather than pull * GitSCM.apply_patch: raise specific exceptions * GitSCM: set Git user on local clone * test: ignore denyCurrentBranch in git repo fixture * test_landings: allow create_patch_revision to fetch normal patches by index * *SCM.apply_patch: add suffix to temporary files * tests: don't rstrip diff fixtures (git requires a trailing newline) * test_git: initialise test_repo with seed patch * tests: add git_repo to landings tests * test_landings: add repo_mc parametrable factory * test-landings: add hg_repo_mc fixture * conftest: dos2unix conversion Co-authored-by: Connor Sheehan <[email protected]> Co-authored-by: Zeid <[email protected]>
mozilla-conduit · Jan 17, 2025 · 6d0ff8b · 6d0ff8b
1 parent d780a9e
commit 6d0ff8b
Show file tree

Hide file tree

Showing 6 changed files with 225 additions and 160 deletions.
diff --git a/src/lando/api/legacy/workers/landing_worker.py b/src/lando/api/legacy/workers/landing_worker.py
@@ -2,14 +2,12 @@
 
 import configparser
 import logging
-import re
 import subprocess
 from contextlib import contextmanager
 from datetime import datetime
 from io import StringIO
 from pathlib import Path
 from typing import (
-    Any,
     Optional,
 )
 
@@ -129,47 +127,6 @@ def notify_user_of_landing_failure(job: LandingJob):
             job.requester_email, job.landing_job_identifier, job.error, job.id
         )
 
-    def process_merge_conflict(
-        self,
-        exception: PatchConflict,
-        repo: Repo,
-        scm: AbstractSCM,
-        revision_id: int,
-    ) -> dict[str, Any]:
-        """Extract and parse merge conflict data from exception into a usable format."""
-        failed_paths, rejects_paths = self.extract_error_data(str(exception))
-
-        # Find last commits to touch each failed path.
-        failed_path_changesets = [
-            (path, scm.last_commit_for_path(path)) for path in failed_paths
-        ]
-
-        breakdown = {
-            "revision_id": revision_id,
-            "rejects_paths": None,
-        }
-
-        breakdown["failed_paths"] = [
-            {
-                "path": path,
-                "url": f"{repo.pull_path}/file/{revision}/{path}",
-                "changeset_id": revision,
-            }
-            for (path, revision) in failed_path_changesets
-        ]
-        breakdown["rejects_paths"] = {}
-        for path in rejects_paths:
-            reject = {"path": path}
-            try:
-                with open(scm.get_rejects_path() / repo.path[1:] / path, "r") as f:
-                    reject["content"] = f.read()
-            except Exception as e:
-                logger.exception(e)
-            # Use actual path of file to store reject data, by removing
-            # `.rej` extension.
-            breakdown["rejects_paths"][path[:-4]] = reject
-        return breakdown
-
     @staticmethod
     def notify_user_of_bug_update_failure(job: LandingJob, exception: Exception):
         """Wrapper around notify_user_of_bug_update_failure for convenience.
@@ -202,24 +159,6 @@ def phab_trigger_repo_update(phab_identifier: str):
             logger.exception("Failed sending repo update task to Celery.")
             logger.exception(e)
 
-    @staticmethod
-    def extract_error_data(exception: str) -> tuple[list[str], list[str]]:
-        """Extract rejected hunks and file paths from exception message."""
-        # RE to capture .rej file paths.
-        rejs_re = re.compile(
-            r"^\d+ out of \d+ hunks FAILED -- saving rejects to file (.+)$",
-            re.MULTILINE,
-        )
-
-        # TODO: capture reason for patch failure, e.g. deleting non-existing file, or
-        # adding a pre-existing file, etc...
-        rejects_paths = rejs_re.findall(exception)
-
-        # Collect all failed paths by removing `.rej` extension.
-        failed_paths = [path[:-4] for path in rejects_paths]
-
-        return failed_paths, rejects_paths
-
     def autoformat(
         self,
         job: LandingJob,
@@ -354,8 +293,8 @@ def run_job(self, job: LandingJob) -> bool:
                         date,
                     )
                 except PatchConflict as exc:
-                    breakdown = self.process_merge_conflict(
-                        exc, repo, scm, revision.revision_id
+                    breakdown = scm.process_merge_conflict(
+                        repo.pull_path, revision.revision_id, str(exc)
                     )
                     job.error_breakdown = breakdown
 

diff --git a/src/lando/api/tests/test_hg.py b/src/lando/api/tests/test_hg.py
@@ -1,5 +1,6 @@
 import io
 import os
+import textwrap
 from unittest import mock
 
 import pytest
@@ -288,3 +289,59 @@ def test_repo_is_supported(repo_path: str, expected: bool, hg_clone):
     assert (
         scm.repo_is_supported(repo_path) == expected
     ), f"{scm} did not correctly report support for {repo_path}"
+
+
+def test_HgSCM__extract_error_data():
+    exception_message = textwrap.dedent(
+        """\
+    patching file toolkit/moz.configure
+    Hunk #1 FAILED at 2075
+    Hunk #2 FAILED at 2325
+    Hunk #3 FAILED at 2340
+    3 out of 3 hunks FAILED -- saving rejects to file toolkit/moz.configure.rej
+    patching file moz.configure
+    Hunk #1 FAILED at 239
+    Hunk #2 FAILED at 250
+    2 out of 2 hunks FAILED -- saving rejects to file moz.configure.rej
+    patching file a/b/c.d
+    Hunk #1 FAILED at 656
+    1 out of 1 hunks FAILED -- saving rejects to file a/b/c.d.rej
+    patching file d/e/f.g
+    Hunk #1 FAILED at 6
+    1 out of 1 hunks FAILED -- saving rejects to file d/e/f.g.rej
+    patching file h/i/j.k
+    Hunk #1 FAILED at 4
+    1 out of 1 hunks FAILED -- saving rejects to file h/i/j.k.rej
+    file G0fvb1RuMQxXNjs already exists
+    1 out of 1 hunks FAILED -- saving rejects to file G0fvb1RuMQxXNjs.rej
+    unable to find 'abc/def' for patching
+    (use '--prefix' to apply patch relative to the current directory)
+    1 out of 1 hunks FAILED -- saving rejects to file abc/def.rej
+    patching file browser/locales/en-US/browser/browserContext.ftl
+    Hunk #1 succeeded at 300 with fuzz 2 (offset -4 lines).
+    abort: patch failed to apply"""
+    )
+
+    expected_failed_paths = [
+        "toolkit/moz.configure",
+        "moz.configure",
+        "a/b/c.d",
+        "d/e/f.g",
+        "h/i/j.k",
+        "G0fvb1RuMQxXNjs",
+        "abc/def",
+    ]
+
+    expected_rejects_paths = [
+        "toolkit/moz.configure.rej",
+        "moz.configure.rej",
+        "a/b/c.d.rej",
+        "d/e/f.g.rej",
+        "h/i/j.k.rej",
+        "G0fvb1RuMQxXNjs.rej",
+        "abc/def.rej",
+    ]
+
+    failed_paths, rejects_paths = HgSCM._extract_error_data(exception_message)
+    assert failed_paths == expected_failed_paths
+    assert rejects_paths == expected_rejects_paths
diff --git a/src/lando/api/tests/test_landings.py b/src/lando/api/tests/test_landings.py
@@ -1,6 +1,5 @@
 import io
 import pathlib
-import textwrap
 import unittest.mock as mock
 from collections.abc import Callable
 
@@ -575,11 +574,10 @@ def test_lose_push_race(
 
 
 @pytest.mark.parametrize(
-    "repo_type",
+    "repo_type, expected_error_log",
     [
-        # Bug 1940876
-        # SCM_TYPE_GIT,
-        SCM_TYPE_HG,
+        (SCM_TYPE_GIT, "Rejected hunk"),
+        (SCM_TYPE_HG, "hunks FAILED"),
     ],
 )
 @pytest.mark.django_db
@@ -590,6 +588,7 @@ def test_merge_conflict(
     create_patch_revision,
     caplog,
     repo_type: str,
+    expected_error_log: str,
 ):
     repo = repo_mc(repo_type)
     treestatusdouble.open_tree(repo.name)
@@ -619,7 +618,9 @@ def test_merge_conflict(
 
     assert worker.run_job(job)
     assert job.status == LandingJobStatus.FAILED
-    assert "hunks FAILED" in caplog.text
+
+    assert expected_error_log in caplog.text
+
     assert job.error_breakdown, "No error breakdown added to job"
     assert job.error_breakdown.get(
         "rejects_paths"
@@ -640,8 +641,7 @@ def test_merge_conflict(
 @pytest.mark.parametrize(
     "repo_type",
     [
-        # Bug 1940876
-        # SCM_TYPE_GIT,
+        SCM_TYPE_GIT,
         SCM_TYPE_HG,
     ],
 )
@@ -689,62 +689,6 @@ def test_failed_landing_job_notification(
     assert mock_notify.call_count == 1
 
 
-def test_landing_worker__extract_error_data():
-    exception_message = textwrap.dedent(
-        """\
-    patching file toolkit/moz.configure
-    Hunk #1 FAILED at 2075
-    Hunk #2 FAILED at 2325
-    Hunk #3 FAILED at 2340
-    3 out of 3 hunks FAILED -- saving rejects to file toolkit/moz.configure.rej
-    patching file moz.configure
-    Hunk #1 FAILED at 239
-    Hunk #2 FAILED at 250
-    2 out of 2 hunks FAILED -- saving rejects to file moz.configure.rej
-    patching file a/b/c.d
-    Hunk #1 FAILED at 656
-    1 out of 1 hunks FAILED -- saving rejects to file a/b/c.d.rej
-    patching file d/e/f.g
-    Hunk #1 FAILED at 6
-    1 out of 1 hunks FAILED -- saving rejects to file d/e/f.g.rej
-    patching file h/i/j.k
-    Hunk #1 FAILED at 4
-    1 out of 1 hunks FAILED -- saving rejects to file h/i/j.k.rej
-    file G0fvb1RuMQxXNjs already exists
-    1 out of 1 hunks FAILED -- saving rejects to file G0fvb1RuMQxXNjs.rej
-    unable to find 'abc/def' for patching
-    (use '--prefix' to apply patch relative to the current directory)
-    1 out of 1 hunks FAILED -- saving rejects to file abc/def.rej
-    patching file browser/locales/en-US/browser/browserContext.ftl
-    Hunk #1 succeeded at 300 with fuzz 2 (offset -4 lines).
-    abort: patch failed to apply"""
-    )
-
-    expected_failed_paths = [
-        "toolkit/moz.configure",
-        "moz.configure",
-        "a/b/c.d",
-        "d/e/f.g",
-        "h/i/j.k",
-        "G0fvb1RuMQxXNjs",
-        "abc/def",
-    ]
-
-    expected_rejects_paths = [
-        "toolkit/moz.configure.rej",
-        "moz.configure.rej",
-        "a/b/c.d.rej",
-        "d/e/f.g.rej",
-        "h/i/j.k.rej",
-        "G0fvb1RuMQxXNjs.rej",
-        "abc/def.rej",
-    ]
-
-    failed_paths, rejects_paths = LandingWorker.extract_error_data(exception_message)
-    assert failed_paths == expected_failed_paths
-    assert rejects_paths == expected_rejects_paths
-
-
 @pytest.mark.parametrize(
     "repo_type",
     [
@@ -974,8 +918,7 @@ def _scm_get_last_commit_message(scm: AbstractSCM) -> str:
 @pytest.mark.parametrize(
     "repo_type",
     [
-        # Bug 1940876
-        # SCM_TYPE_GIT,
+        SCM_TYPE_GIT,
         SCM_TYPE_HG,
     ],
 )
@@ -985,6 +928,7 @@ def test_format_patch_fail(
     treestatusdouble,
     monkeypatch,
     create_patch_revision,
+    normal_patch,
     repo_type: str,
 ):
     """Tests automated formatting failures before landing."""
@@ -993,8 +937,8 @@ def test_format_patch_fail(
 
     revisions = [
         create_patch_revision(1, patch=PATCH_FORMATTING_PATTERN_FAIL),
-        create_patch_revision(2),
-        create_patch_revision(3),
+        create_patch_revision(2, patch=normal_patch(0)),
+        create_patch_revision(3, patch=normal_patch(1)),
     ]
     job_params = {
         "status": LandingJobStatus.IN_PROGRESS,
@@ -1030,8 +974,7 @@ def test_format_patch_fail(
 @pytest.mark.parametrize(
     "repo_type",
     [
-        # Bug 1940876
-        # SCM_TYPE_GIT,
+        SCM_TYPE_GIT,
         SCM_TYPE_HG,
     ],
 )
@@ -1048,8 +991,10 @@ def test_format_patch_no_landoini(
     treestatusdouble.open_tree(repo.name)
 
     revisions = [
-        create_patch_revision(1),
-        create_patch_revision(2),
+        # Patch=None lets create_patch_revision determine the patch to use based on the
+        # revision number.
+        create_patch_revision(1, patch=None),
+        create_patch_revision(2, patch=None),
     ]
     job_params = {
         "status": LandingJobStatus.IN_PROGRESS,

diff --git a/src/lando/main/scm/abstract_scm.py b/src/lando/main/scm/abstract_scm.py
@@ -1,7 +1,7 @@
 import logging
 from abc import abstractmethod
 from pathlib import Path
-from typing import ContextManager, Optional
+from typing import Any, ContextManager, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -30,16 +30,6 @@ def scm_name(cls) -> str:
         """Return a _human-friendly_ string identifying the supported SCM (e.g.,
         `Mercurial`)."""
 
-    @classmethod
-    def get_rejects_path(cls) -> Path:
-        """Return the path where the SCM stores reject files.
-
-        We assume most SCMs just use the local directory, and provide a default
-        implementation. This however allows SCMs whose behaviour differs, such as
-        Mercurial, to provide a path to where the rejects are stored.
-        """
-        return Path(".")
-
     @abstractmethod
     def clone(self, source: str):
         """Clone a repository from a source.
@@ -102,6 +92,26 @@ def apply_patch(
             None
         """
 
+    @abstractmethod
+    def process_merge_conflict(
+        self, pull_path: str, revision_id: int, error_message: str
+    ) -> dict[str, Any]:
+        """Process merge conflict information captured in a PatchConflict, and return a
+        parsed structure.
+
+        The structure is a nested dict as follows:
+
+            revision_id: revision_id
+            failed paths: list[dict]
+                path: str
+                url: str
+                changeset_id: str
+            rejects_paths: dict[str, dict]
+                <str>: dict[str, str] (conflicted file path)
+                    path: str (reject file path)
+                    content: str
+        """
+
     @abstractmethod
     def for_pull(self) -> ContextManager:
         """Context manager to prepare the repo with the correct environment variables set for pulling."""