From 71f975458e5229df093f132a6cce3f397eb61e36 Mon Sep 17 00:00:00 2001 From: David Szervanszky Date: Mon, 30 Mar 2020 16:45:26 +0100 Subject: [PATCH] Optimistic batching for batch merging Fix for issue #164. Previously we added trailers to each MR and merged the constituent MRs of a batch MR to the target branch. Now we will now optimistically add the trailers to each MR and rebase on the batch branch. And when the batch branch passes CI we merge it which will automatically merge each constituent branch. --- README.md | 27 ++++++++-------- marge/batch_job.py | 68 +++++++++++++++++------------------------ marge/git.py | 11 +++++-- marge/job.py | 2 ++ pylintrc | 1 + tests/git_repo_mock.py | 4 ++- tests/test_batch_job.py | 25 +-------------- 7 files changed, 57 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index bbcb532e..4d733a0a 100644 --- a/README.md +++ b/README.md @@ -381,16 +381,21 @@ extremely slow CI). ### How it works If marge-bot finds multiple merge requests to deal with, she attempts to create -a batch job. She filters the merge requests such that they have all have a +a batch job. This is initially a batch branch based on the latest master. +She then filters the merge requests such that they have all have a common target branch, and eliminates those that have not yet passed CI (a heuristic to help guarantee the batch will pass CI later). -Once the merge requests have been gathered, a batch branch is created using the -commits from each merge request in sequence. Any merge request that cannot be +Once the merge requests have been gathered, trailers are added to their branches one +by one, these changes are pushed to their remotes, they are rebased on the batch branch +and the rebased version is also pushed to the remote of the individual MR. This +is to ensure that the commit SHAs of the changes in the individual MRs match +those in the batch branch. This is very important later. Any merge request that cannot be merged to this branch (e.g. due to a rebase conflict) is filtered out. A new merge request is then created for this branch, and tested in CI. -If CI passes, the original merge requests will be merged one by one. +If CI passes, the batch branch is merged which merges all individual constituent +MRs (this is why the SHAs must match). If the batch job fails for any reason, we fall back to merging the first merge request, before attempting a new batch job. @@ -406,16 +411,10 @@ request, before attempting a new batch job. each source branch, because we know the final linearization of all commits passes in that all MRs passed individually on their branches. -* As trailers are added to the original merge requests only, their branches - would need to be pushed to in order to reflect this change. This would trigger - CI in each of the branches again that would have to be passed before merging, - which effectively defeats the point of batching. To workaround this, the - current implementation merges to the target branch through git, instead of the - GitLab API. GitLab will detect the merge request as having been merged, and - update the merge request status accordingly, regardless of whether it has - passed CI. This does still mean the triggered CI jobs will be running even - though the merge requests are merged. marge-bot will attempt to cancel these - pipelines, although this doesn't work too effectively if external CI is used. +* Because trailers are added during this process and pushed, this would trigger + CI pipelines for each branch to be merged. We use the `ci.skip` option to prevent + unnecessary CI runs which leaves skipped pipelines appearing on the Pipelines + page for the repo. However this is just a visual issue. * There is what can be considered to be a flaw in this implementation that could potentially result in a non-green master; consider the following situation: diff --git a/marge/batch_job.py b/marge/batch_job.py index b99fd875..c00d078c 100644 --- a/marge/batch_job.py +++ b/marge/batch_job.py @@ -112,7 +112,6 @@ def accept_mr( self, merge_request, expected_remote_target_branch_sha, - source_repo_url=None, ): log.info('Fusing MR !%s', merge_request.iid) approvals = merge_request.fetch_approvals() @@ -122,21 +121,6 @@ def accept_mr( if new_target_sha != expected_remote_target_branch_sha: raise CannotBatch('Someone was naughty and by-passed marge') - # FIXME: we should only add tested-by for the last MR in the batch - _, _, actual_sha = self.update_from_target_branch_and_push( - merge_request, - source_repo_url=source_repo_url, - ) - - sha_now = Commit.last_on_branch( - merge_request.source_project_id, merge_request.source_branch, self._api, - ).id - # Make sure no-one managed to race and push to the branch in the - # meantime, because we're about to impersonate the approvers, and - # we don't want to approve unreviewed commits - if sha_now != actual_sha: - raise CannotMerge('Someone pushed to branch while we were trying to merge') - # As we're not using the API to merge the MR, we don't strictly need to reapprove it. However, # it's a little weird to look at the merged MR to find it has no approvals, so let's do it anyway. self.maybe_reapprove(merge_request, approvals) @@ -198,6 +182,9 @@ def execute(self): merge_request.source_branch, '%s/%s' % (merge_request_remote, merge_request.source_branch), ) + # Apply the trailers before running the batch MR + self.add_trailers(merge_request) + self.push_force_to_mr(merge_request, True, source_repo_url, skip_ci=True) # Update on latest branch so it contains previous MRs self.fuse( @@ -207,6 +194,9 @@ def execute(self): local=True, ) + # Ensure that individual branch commit SHA matches matches that of its equivalent in batch MR + self.push_force_to_mr(merge_request, True, source_repo_url, skip_ci=True) + # Update branch with MR changes self._repo.fast_forward( BatchMergeJob.BATCH_BRANCH_NAME, @@ -225,36 +215,34 @@ def execute(self): working_merge_requests.append(merge_request) if len(working_merge_requests) <= 1: raise CannotBatch('not enough ready merge requests') - if self._project.only_allow_merge_if_pipeline_succeeds: - # This switches git to branch - self.push_batch() - batch_mr = self.create_batch_mr( - target_branch=target_branch, - ) + # This switches git to branch + self.push_batch() + batch_mr = self.create_batch_mr( + target_branch=target_branch, + ) + for merge_request in working_merge_requests: + merge_request.comment('I will attempt to batch this MR (!{})...'.format(batch_mr.iid)) + try: + self.wait_for_ci_to_pass(batch_mr) + except CannotMerge as err: for merge_request in working_merge_requests: - merge_request.comment('I will attempt to batch this MR (!{})...'.format(batch_mr.iid)) - try: - self.wait_for_ci_to_pass(batch_mr) - except CannotMerge as err: - for merge_request in working_merge_requests: - merge_request.comment( - 'Batch MR !{batch_mr_iid} failed: {error} I will retry later...'.format( - batch_mr_iid=batch_mr.iid, - error=err.reason, - ), - ) - raise CannotBatch(err.reason) from err + merge_request.comment( + 'Batch MR !{batch_mr_iid} failed: {error} I will retry later...'.format( + batch_mr_iid=batch_mr.iid, + error=err.reason, + ), + ) + raise CannotBatch(err.reason) from err for merge_request in working_merge_requests: try: # FIXME: this should probably be part of the merge request _, source_repo_url, merge_request_remote = self.fetch_source_project(merge_request) - self.ensure_mr_not_changed(merge_request) self.ensure_mergeable_mr(merge_request) - remote_target_branch_sha = self.accept_mr( - merge_request, - remote_target_branch_sha, - source_repo_url=source_repo_url, - ) + if merge_request == working_merge_requests[-1]: + self.accept_mr( + batch_mr, + remote_target_branch_sha, + ) except CannotBatch as err: merge_request.comment( "I couldn't merge this branch: {error} I will retry later...".format( diff --git a/marge/git.py b/marge/git.py index 3b424b5e..153c49ad 100644 --- a/marge/git.py +++ b/marge/git.py @@ -131,7 +131,7 @@ def remove_branch(self, branch, *, new_current_branch='master'): def checkout_branch(self, branch, start_point=''): self.git('checkout', '-B', branch, start_point, '--') - def push(self, branch, *, source_repo_url=None, force=False): + def push(self, branch, *, source_repo_url=None, force=False, skip_ci=False): self.git('checkout', branch, '--') self.git('diff-index', '--quiet', 'HEAD') # check it is not dirty @@ -146,7 +146,14 @@ def push(self, branch, *, source_repo_url=None, force=False): else: source = 'origin' force_flag = '--force' if force else '' - self.git('push', force_flag, source, '%s:%s' % (branch, branch)) + # The following needs to be split into 2 variables as any whitespace in the string + # causes it to be quoted which makes the string ignored by git + if skip_ci: + skip_1 = '-o' + skip_2 = 'ci.skip' + else: + skip_1, skip_2 = '', '' + self.git('push', force_flag, skip_1, skip_2, source, '%s:%s' % (branch, branch)) def get_commit_hash(self, rev='HEAD'): """Return commit hash for `rev` (default "HEAD").""" diff --git a/marge/job.py b/marge/job.py index 2abbce10..39eb8d00 100644 --- a/marge/job.py +++ b/marge/job.py @@ -336,12 +336,14 @@ def push_force_to_mr( merge_request, branch_was_modified, source_repo_url=None, + skip_ci=False, ): try: self._repo.push( merge_request.source_branch, source_repo_url=source_repo_url, force=True, + skip_ci=skip_ci ) except git.GitError: def fetch_remote_branch(): diff --git a/pylintrc b/pylintrc index de05e3d5..3a237775 100644 --- a/pylintrc +++ b/pylintrc @@ -30,6 +30,7 @@ max-line-length=110 [DESIGN] max-args=10 max-attributes=15 +max-locals=16 max-public-methods=35 [REPORTS] diff --git a/tests/git_repo_mock.py b/tests/git_repo_mock.py index 6aeca3f0..13787537 100644 --- a/tests/git_repo_mock.py +++ b/tests/git_repo_mock.py @@ -178,9 +178,11 @@ def merge(self, arg): self._local_repo.set_ref(self._branch, new_sha) def push(self, *args): - force_flag, remote_name, refspec = args + force_flag, skip_1, skip_2, remote_name, refspec = args assert force_flag in ('', '--force') + assert skip_1 in ('', '-o') + assert skip_2 in ('', 'ci.skip') branch, remote_branch = refspec.split(':') remote_url = self._remotes[remote_name] diff --git a/tests/test_batch_job.py b/tests/test_batch_job.py index 109b9056..99ba0fce 100644 --- a/tests/test_batch_job.py +++ b/tests/test_batch_job.py @@ -7,10 +7,9 @@ import marge.project import marge.user from marge.batch_job import BatchMergeJob, CannotBatch -from marge.gitlab import GET from marge.job import CannotMerge, MergeJobOptions from marge.merge_request import MergeRequest -from tests.gitlab_api_mock import MockLab, Ok, commit +from tests.gitlab_api_mock import MockLab class TestBatchJob: @@ -160,25 +159,3 @@ def test_fuse_mr_when_target_branch_was_moved(self, api, mocklab): with pytest.raises(CannotBatch) as exc_info: batch_merge_job.accept_mr(merge_request, 'abc') assert str(exc_info.value) == 'Someone was naughty and by-passed marge' - - def test_fuse_mr_when_source_branch_was_moved(self, api, mocklab): - batch_merge_job = self.get_batch_merge_job(api, mocklab) - merge_request = self._mock_merge_request( - source_project_id=mocklab.merge_request_info['source_project_id'], - target_branch='master', - source_branch=mocklab.merge_request_info['source_branch'], - ) - - api.add_transition( - GET( - '/projects/{project_iid}/repository/branches/useless_new_feature'.format( - project_iid=mocklab.merge_request_info['source_project_id'], - ), - ), - Ok({'commit': commit(commit_id='abc', status='running')}), - ) - - with pytest.raises(CannotMerge) as exc_info: - batch_merge_job.accept_mr(merge_request, mocklab.initial_master_sha) - - assert str(exc_info.value) == 'Someone pushed to branch while we were trying to merge'