From 6318a61a2091e9939a85eafecd413fcaa4ee55e9 Mon Sep 17 00:00:00 2001 From: lihuanshuai Date: Tue, 26 Aug 2014 11:24:02 +0800 Subject: [PATCH] add git.multi-gc --- .gitignore | 1 + ellen/git/gc.py | 165 +++++++++++++++++++++++++++++++++++++++++++++++ ellen/repo.py | 8 +++ tests/test_gc.py | 77 ++++++++++++++++++++++ 4 files changed, 251 insertions(+) create mode 100644 ellen/git/gc.py create mode 100644 tests/test_gc.py diff --git a/.gitignore b/.gitignore index 035e7d5..7feeed7 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ build tmp .ropeproject venv/ +.idea diff --git a/ellen/git/gc.py b/ellen/git/gc.py new file mode 100644 index 0000000..a32c44b --- /dev/null +++ b/ellen/git/gc.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +import os +import re +import sys +from functools import wraps +from ellen.utils.process import git_with_repo + +P_COMMIT = re.compile(r"^([0-9a-f]{40})\s+commit$") +P_OBJ = re.compile(r"^[0-9a-f]{38}$") + +AGGRESSIVE_WINDOW = 250 +AUTO_THRESHOLD = 6700 +AUTO_PACK_LIMIT = 50 +EXPIRE = '2.weeks.ago' +REPACK_ALL_OPTS = {'a': None, 'A': None, 'unpack_unreachable': None} + + +def check_status(f): + @wraps(f) + def wrapper(*a, **kw): + status = f(*a, **kw) + if status['returncode'] != 0: + raise RuntimeError("'%s' failed during git.multi_gc" % f.__name__) + return status + return wrapper + + +@check_status +def git_log(git, *a, **kw): + return git.log(*a, **kw) + + +@check_status +def git_pack_refs(git, *a, **kw): + return git.pack_refs(*a, **kw) + + +@check_status +def git_reflog(git, *a, **kw): + return git.reflog(*a, **kw) + + +@check_status +def git_repack(git, *a, **kw): + return git.repack(*a, **kw) + + +@check_status +def git_prune(git, *a, **kw): + return git.prune(*a, **kw) + + +@check_status +def git_rerere(git, *a, **kw): + return git.rerere(*a, **kw) + + +def _update_repack_all_options(expire=EXPIRE): + if "now" == expire: + REPACK_ALL_OPTS['a'] = True + elif expire: + REPACK_ALL_OPTS['A'] = True + REPACK_ALL_OPTS['unpack_unreachable'] = expire + + +def _too_many_loose_objects(repository): + obj_dir = os.path.join(repository.path, "objects/") + if AUTO_THRESHOLD <= 0: + return False + auto_thr = (AUTO_THRESHOLD + 255) // 256 + if not os.path.isdir(obj_dir): + return False + files = os.listdir(obj_dir) + root = obj_dir + if not files: + return False + for f in files: + path = os.path.join(root, f) + if os.path.isdir(path): + root = path + break + cnt = 0 + for f in os.listdir(root): + path = os.path.join(root, f) + if os.path.isfile(path) and P_OBJ.search(f): + cnt += 1 + if cnt > auto_thr: + return True + return False + + +def _too_many_packs(repository): + if AUTO_PACK_LIMIT <= 0: + return False + path = os.path.join(repository.path, "objects/info/packs") + if not os.path.isfile(path): + return False + with open(path, 'r') as f: + lines = f.readlines() + packs = len(lines) - 1 + if packs >= AUTO_PACK_LIMIT: + return True + return False + + +def need_to_gc(repository, expire=EXPIRE): + if AUTO_THRESHOLD <= 0: + return False + if _too_many_packs(repository): + _update_repack_all_options(expire=expire) + elif not _too_many_loose_objects(): + return False + return True + + +def gc_repository(repository, forks, auto=None, prune=None): + """git gc command + """ + expire = 'now' if prune == 'all' else prune + if not expire: + expire = EXPIRE + + try: + git = git_with_repo(repository) + status = {'returncode': 0, 'fullcmd': '%s multi-gc' % ' '.join(git.cmds), 'stderr': '', 'stdout': ''} + if prune: + prune_opt = "--prune=" + prune + status['fullcmd'] += ' ' + prune_opt + if not forks: + return git.gc(prune_opt, auto=auto) if prune else git.gc(auto=auto) + else: + paths = [ "--fork='%s'" % r.path for r in forks] + status['fullcmd'] += ' ' + ' '.join(paths) + + if auto: + status['fullcmd'] += ' --auto' + if not need_to_gc(repository, expire=expire): + return status + else: + _update_repack_all_options(expire=expire) + + git_pack_refs(git, all=True, prune=True) + git_reflog(git, 'expire', all=True) + git_repack(git, d=True, l=True, a=REPACK_ALL_OPTS['a'], + A=REPACK_ALL_OPTS['A'], + unpack_unreachable=REPACK_ALL_OPTS['unpack_unreachable']) + + # seek commits to be pruned + all_fork_commits = [] + commits = set() + for f in forks: + fork_git = git_with_repo(f) + all_fork_commits += git_log(fork_git, '--pretty=format:%H', all=True)['stdout'].splitlines() + for line in git_prune(git, dry_run=True, expire=expire)['stdout'].splitlines(): + matcher = P_COMMIT.search(line) + if matcher: + commits.add(matcher.group(1)) + commits &= set(all_fork_commits) + + git_prune(git, *commits, expire=expire) + git_rerere(git, 'gc') + except Exception as e: + print >>sys.stderr, e + status['returncode'] = -1 + return status diff --git a/ellen/repo.py b/ellen/repo.py index 6fc2040..7e0c0f8 100644 --- a/ellen/repo.py +++ b/ellen/repo.py @@ -12,6 +12,7 @@ from ellen.git.tag import list_tags, create_tag from ellen.git.commit import create_commit from ellen.git.diff import diff_wrapper as diff +from ellen.git.gc import gc_repository from ellen.git.ref import update_ref from ellen.git.clone import clone_repository, update_server_info from ellen.git.init import init_repository @@ -261,6 +262,13 @@ def create_tag(self, name, ref, author_name, author_email, message): def update_hooks(self, path): return update_hooks(self.repository, path) + def gc(self, fork_paths=None, auto=None, prune=None): + forks = [] + if isinstance(fork_paths, (list, tuple)): + for p in fork_paths: + forks.append(repository(p)) + return gc_repository(self.repository, forks, auto=auto, prune=prune) + def repository(path): try: diff --git a/tests/test_gc.py b/tests/test_gc.py new file mode 100644 index 0000000..5efd344 --- /dev/null +++ b/tests/test_gc.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +from pygit2 import Repository +from pygit2 import is_repository +from _base import BareRepoTest +from ellen.repo import Jagare + +class test_gc(BareRepoTest): + + def test_simple(self): + repo = Jagare(self.path) + pygit2_repo = Repository(self.path) + ret = repo.gc() + self.assertEqual(ret['returncode'], 0) + self.assertTrue('gc' in ret['fullcmd']) + self.assertFalse(pygit2_repo.is_empty) + self.assertTrue(pygit2_repo.is_bare) + self.assertFalse(repo.empty) + self.assertTrue(repo.bare) + + def test_simple_auto(self): + repo = Jagare(self.path) + pygit2_repo = Repository(self.path) + ret = repo.gc(auto=True) + self.assertEqual(ret['returncode'], 0) + self.assertTrue('gc' in ret['fullcmd']) + self.assertFalse(pygit2_repo.is_empty) + self.assertTrue(pygit2_repo.is_bare) + self.assertFalse(repo.empty) + self.assertTrue(repo.bare) + + def test_simple_all(self): + repo = Jagare(self.path) + pygit2_repo = Repository(self.path) + ret = repo.gc(prune='all') + self.assertEqual(ret['returncode'], 0) + self.assertTrue('gc' in ret['fullcmd']) + self.assertFalse(pygit2_repo.is_empty) + self.assertTrue(pygit2_repo.is_bare) + self.assertFalse(repo.empty) + self.assertTrue(repo.bare) + + def test_multi(self): + repo = Jagare(self.path) + path = self.get_temp_path() + clone_repo = repo.clone(path, shared=True) + pygit2_repo = Repository(path) + ret = repo.gc(fork_paths=path) + self.assertTrue(is_repository(path)) + self.assertFalse(pygit2_repo.is_empty) + self.assertFalse(pygit2_repo.is_bare) + self.assertFalse(clone_repo.empty) + self.assertFalse(clone_repo.bare) + + def test_multi_auto(self): + repo = Jagare(self.path) + path = self.get_temp_path() + clone_repo = repo.clone(path, shared=True) + pygit2_repo = Repository(path) + ret = repo.gc(fork_paths=path, auto=True) + self.assertTrue(is_repository(path)) + self.assertFalse(pygit2_repo.is_empty) + self.assertFalse(pygit2_repo.is_bare) + self.assertFalse(clone_repo.empty) + self.assertFalse(clone_repo.bare) + + def test_multi_all(self): + repo = Jagare(self.path) + path = self.get_temp_path() + clone_repo = repo.clone(path, shared=True) + pygit2_repo = Repository(path) + ret = repo.gc(fork_paths=path, auto=True) + self.assertTrue(is_repository(path)) + self.assertFalse(pygit2_repo.is_empty) + self.assertFalse(pygit2_repo.is_bare) + self.assertFalse(clone_repo.empty) + self.assertFalse(clone_repo.bare)