Skip to content

Commit

Permalink
improve prune
Browse files Browse the repository at this point in the history
  • Loading branch information
lihuanshuai committed Sep 6, 2014
1 parent 6318a61 commit b7ebcd6
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 59 deletions.
118 changes: 59 additions & 59 deletions ellen/git/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,36 @@
from functools import wraps
from ellen.utils.process import git_with_repo

P_COMMIT = re.compile(r"^([0-9a-f]{40})\s+commit$")
P_OBJ = re.compile(r"^[0-9a-f]{38}$")

AGGRESSIVE_WINDOW = 250
AUTO_THRESHOLD = 6700
AUTO_PACK_LIMIT = 50
EXPIRE = '2.weeks.ago'
REPACK_ALL_OPTS = {'a': None, 'A': None, 'unpack_unreachable': None}
_OPTS = {'repack_all': {}}


def check_status(f):
@wraps(f)
def wrapper(*a, **kw):
fn = a[0]
status = f(*a, **kw)
if status['returncode'] != 0:
raise RuntimeError("'%s' failed during git.multi_gc" % f.__name__)
raise RuntimeError("'%s' failed during git.multi_gc" % fn.__name__)
return status
return wrapper


@check_status
def git_log(git, *a, **kw):
return git.log(*a, **kw)


@check_status
def git_pack_refs(git, *a, **kw):
return git.pack_refs(*a, **kw)


@check_status
def git_reflog(git, *a, **kw):
return git.reflog(*a, **kw)


@check_status
def git_repack(git, *a, **kw):
return git.repack(*a, **kw)


@check_status
def git_prune(git, *a, **kw):
return git.prune(*a, **kw)


@check_status
def git_rerere(git, *a, **kw):
return git.rerere(*a, **kw)
def git_process(fn, *a, **kw):
return fn(*a, **kw)


def _update_repack_all_options(expire=EXPIRE):
if "now" == expire:
REPACK_ALL_OPTS['a'] = True
elif expire:
REPACK_ALL_OPTS['A'] = True
REPACK_ALL_OPTS['unpack_unreachable'] = expire
a = True if "now" == expire else None
A = True if expire else None
unpack_unreachable = expire if expire else None
_OPTS['repack_all'] = dict(a=a, A=A, unpack_unreachable=unpack_unreachable)


def _too_many_loose_objects(repository):
Expand Down Expand Up @@ -94,7 +68,7 @@ def _too_many_packs(repository):
return False
path = os.path.join(repository.path, "objects/info/packs")
if not os.path.isfile(path):
return False
return False
with open(path, 'r') as f:
lines = f.readlines()
packs = len(lines) - 1
Expand All @@ -113,21 +87,54 @@ def need_to_gc(repository, expire=EXPIRE):
return True


class BfsQue(object):
def __init__(self, wanted, cnd_fn=lambda x, s: x in s):
self.data = []
self.visited = []
self.wanted = wanted
self.cnd = cnd_fn

def _visit(self, item):
addq = lambda q, x: q.append(x)
if self.cnd(item, self.wanted) and item not in self.data:
addq(self.data, item)
return True
addq(self.visited, item)
return False

def search(self, item):
empty = lambda x: len(x) == 0
addq = lambda q, x: q.append(x)
delq = lambda x: x.pop(0)
avail = []
if self._visit(item):
return
addq(avail, item)
while not empty(avail):
c = delq(avail)
for p in c.parents:
if p in self.visited or self._visit(p):
continue
addq(avail, p)


def gc_repository(repository, forks, auto=None, prune=None):
"""git gc command
"""
expire = 'now' if prune == 'all' else prune
if not expire:
expire = EXPIRE

git = git_with_repo(repository)
status = {'returncode': 0, 'fullcmd': '%s multi-gc' % ' '.join(git.cmds), 'stderr': '', 'stdout': ''}
try:
git = git_with_repo(repository)
status = {'returncode': 0, 'fullcmd': '%s multi-gc' % ' '.join(git.cmds), 'stderr': '', 'stdout': ''}
prune_opts = []
if prune:
prune_opt = "--prune=" + prune
status['fullcmd'] += ' ' + prune_opt
prune_opts.append("--prune=" + prune)
status['fullcmd'] += ' ' + prune_opts[0]

if not forks:
return git.gc(prune_opt, auto=auto) if prune else git.gc(auto=auto)
return git.gc(*prune_opts, auto=auto)
else:
paths = [ "--fork='%s'" % r.path for r in forks]
status['fullcmd'] += ' ' + ' '.join(paths)
Expand All @@ -139,26 +146,19 @@ def gc_repository(repository, forks, auto=None, prune=None):
else:
_update_repack_all_options(expire=expire)

git_pack_refs(git, all=True, prune=True)
git_reflog(git, 'expire', all=True)
git_repack(git, d=True, l=True, a=REPACK_ALL_OPTS['a'],
A=REPACK_ALL_OPTS['A'],
unpack_unreachable=REPACK_ALL_OPTS['unpack_unreachable'])
git_process(git.pack_refs, all=True, prune=True)
git_process(git.reflog, 'expire', all=True)
git_process(git.repack, d=True, l=True, **_OPTS['repack_all'])

# seek commits to be pruned
all_fork_commits = []
commits = set()
que = BfsQue(repository, cnd_fn=lambda commit, repo: commit.id in repo)
for f in forks:
fork_git = git_with_repo(f)
all_fork_commits += git_log(fork_git, '--pretty=format:%H', all=True)['stdout'].splitlines()
for line in git_prune(git, dry_run=True, expire=expire)['stdout'].splitlines():
matcher = P_COMMIT.search(line)
if matcher:
commits.add(matcher.group(1))
commits &= set(all_fork_commits)

git_prune(git, *commits, expire=expire)
git_rerere(git, 'gc')
refs = f.listall_references()
for ref in refs:
ref_commit = f.lookup_reference(ref).get_object()
que.search(ref_commit)
commits = [str(c.id) for c in que.data]
git_process(git.prune, *commits, expire=expire)
git_process(git.rerere, 'gc')
except Exception as e:
print >>sys.stderr, e
status['returncode'] = -1
Expand Down
54 changes: 54 additions & 0 deletions tests/test_gc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-

import unittest
from pygit2 import Repository
from pygit2 import is_repository
from _base import BareRepoTest
from ellen.repo import Jagare
from ellen.git.gc import BfsQue

class test_gc(BareRepoTest):

Expand Down Expand Up @@ -75,3 +77,55 @@ def test_multi_all(self):
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)


class Node(object):
def __init__(self, id, parents):
self.id = id
self.parents = parents


class test_que(unittest.TestCase):
def setUp(self):
self.node7 = Node(7, [])
self.node5, self.node6 = Node(5, [self.node7]), Node(6, [self.node7])
self.node3, self.node4 = Node(3, [self.node5]), Node(4, [self.node6])
self.node2 = Node(2, [self.node3, self.node4])
self.node1 = Node(1, [self.node2])
self.first_node = Node(0, [self.node1])
self.cnd_fn = lambda item, wanted: item in wanted

def test_basic1(self):
self.que = BfsQue([self.first_node], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.first_node])

def test_basic2(self):
self.que = BfsQue([self.node1], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node1])

def test_basic3(self):
self.que = BfsQue([self.node3, self.node4], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node3, self.node4])

def test_basic4(self):
self.que = BfsQue([self.node5, self.node6], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node5, self.node6])

def test_basic5(self):
self.que = BfsQue([self.node7], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node7])

def test_neg1(self):
self.que = BfsQue([self.node3], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node3])

def test_neg2(self):
self.que = BfsQue([self.node5], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node5])

0 comments on commit b7ebcd6

Please sign in to comment.