Skip to content

Commit

Permalink
add git.multi-gc
Browse files Browse the repository at this point in the history
  • Loading branch information
lihuanshuai authored and lihuanshuai committed Sep 6, 2014
1 parent 03f8717 commit a70b6b3
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ build
tmp
.ropeproject
venv/
.idea
165 changes: 165 additions & 0 deletions ellen/git/gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
import os
import re
import sys
from functools import wraps
from ellen.utils.process import git_with_repo

P_COMMIT = re.compile(r"^([0-9a-f]{40})\s+commit$")
P_OBJ = re.compile(r"^[0-9a-f]{38}$")

AGGRESSIVE_WINDOW = 250
AUTO_THRESHOLD = 6700
AUTO_PACK_LIMIT = 50
EXPIRE = '2.weeks.ago'
REPACK_ALL_OPTS = {'a': None, 'A': None, 'unpack_unreachable': None}


def check_status(f):
@wraps(f)
def wrapper(*a, **kw):
status = f(*a, **kw)
if status['returncode'] != 0:
raise RuntimeError("'%s' failed during git.multi_gc" % f.__name__)
return status
return wrapper


@check_status
def git_log(git, *a, **kw):
return git.log(*a, **kw)


@check_status
def git_pack_refs(git, *a, **kw):
return git.pack_refs(*a, **kw)


@check_status
def git_reflog(git, *a, **kw):
return git.reflog(*a, **kw)


@check_status
def git_repack(git, *a, **kw):
return git.repack(*a, **kw)


@check_status
def git_prune(git, *a, **kw):
return git.prune(*a, **kw)


@check_status
def git_rerere(git, *a, **kw):
return git.rerere(*a, **kw)


def _update_repack_all_options(expire=EXPIRE):
if "now" == expire:
REPACK_ALL_OPTS['a'] = True
elif expire:
REPACK_ALL_OPTS['A'] = True
REPACK_ALL_OPTS['unpack_unreachable'] = expire


def _too_many_loose_objects(repository):
obj_dir = os.path.join(repository.path, "objects/")
if AUTO_THRESHOLD <= 0:
return False
auto_thr = (AUTO_THRESHOLD + 255) // 256
if not os.path.isdir(obj_dir):
return False
files = os.listdir(obj_dir)
root = obj_dir
if not files:
return False
for f in files:
path = os.path.join(root, f)
if os.path.isdir(path):
root = path
break
cnt = 0
for f in os.listdir(root):
path = os.path.join(root, f)
if os.path.isfile(path) and P_OBJ.search(f):
cnt += 1
if cnt > auto_thr:
return True
return False


def _too_many_packs(repository):
if AUTO_PACK_LIMIT <= 0:
return False
path = os.path.join(repository.path, "objects/info/packs")
if not os.path.isfile(path):
return False
with open(path, 'r') as f:
lines = f.readlines()
packs = len(lines) - 1
if packs >= AUTO_PACK_LIMIT:
return True
return False


def need_to_gc(repository, expire=EXPIRE):
if AUTO_THRESHOLD <= 0:
return False
if _too_many_packs(repository):
_update_repack_all_options(expire=expire)
elif not _too_many_loose_objects():
return False
return True


def gc_repository(repository, forks, auto=None, prune=None):
"""git gc command
"""
expire = 'now' if prune == 'all' else prune
if not expire:
expire = EXPIRE

try:
git = git_with_repo(repository)
status = {'returncode': 0, 'fullcmd': '%s multi-gc' % ' '.join(git.cmds), 'stderr': '', 'stdout': ''}
if prune:
prune_opt = "--prune=" + prune
status['fullcmd'] += ' ' + prune_opt
if not forks:
return git.gc(prune_opt, auto=auto) if prune else git.gc(auto=auto)
else:
paths = [ "--fork='%s'" % r.path for r in forks]
status['fullcmd'] += ' ' + ' '.join(paths)

if auto:
status['fullcmd'] += ' --auto'
if not need_to_gc(repository, expire=expire):
return status
else:
_update_repack_all_options(expire=expire)

git_pack_refs(git, all=True, prune=True)
git_reflog(git, 'expire', all=True)
git_repack(git, d=True, l=True, a=REPACK_ALL_OPTS['a'],
A=REPACK_ALL_OPTS['A'],
unpack_unreachable=REPACK_ALL_OPTS['unpack_unreachable'])

# seek commits to be pruned
all_fork_commits = []
commits = set()
for f in forks:
fork_git = git_with_repo(f)
all_fork_commits += git_log(fork_git, '--pretty=format:%H', all=True)['stdout'].splitlines()
for line in git_prune(git, dry_run=True, expire=expire)['stdout'].splitlines():
matcher = P_COMMIT.search(line)
if matcher:
commits.add(matcher.group(1))
commits &= set(all_fork_commits)

git_prune(git, *commits, expire=expire)
git_rerere(git, 'gc')
except Exception as e:
print >>sys.stderr, e
status['returncode'] = -1
return status
8 changes: 8 additions & 0 deletions ellen/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ellen.git.tag import list_tags, create_tag
from ellen.git.commit import create_commit
from ellen.git.diff import diff_wrapper as diff
from ellen.git.gc import gc_repository
from ellen.git.ref import update_ref
from ellen.git.clone import clone_repository, update_server_info
from ellen.git.init import init_repository
Expand Down Expand Up @@ -261,6 +262,13 @@ def create_tag(self, name, ref, author_name, author_email, message):
def update_hooks(self, path):
return update_hooks(self.repository, path)

def gc(self, fork_paths=None, auto=None, prune=None):
forks = []
if isinstance(fork_paths, (list, tuple)):
for p in fork_paths:
forks.append(repository(p))
return gc_repository(self.repository, forks, auto=auto, prune=prune)


def repository(path):
try:
Expand Down
77 changes: 77 additions & 0 deletions tests/test_gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-

from pygit2 import Repository
from pygit2 import is_repository
from _base import BareRepoTest
from ellen.repo import Jagare

class test_gc(BareRepoTest):

def test_simple(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc()
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_simple_auto(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc(auto=True)
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_simple_all(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc(prune='all')
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_multi(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)

def test_multi_auto(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path, auto=True)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)

def test_multi_all(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path, auto=True)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)

0 comments on commit a70b6b3

Please sign in to comment.