Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add git.multi-gc #7

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ build
tmp
.ropeproject
venv/
.idea
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是说换了编辑器了么。。。

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不得不换

165 changes: 165 additions & 0 deletions ellen/git/gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
import os
import re
import sys
from functools import wraps
from ellen.utils.process import git_with_repo

P_OBJ = re.compile(r"^[0-9a-f]{38}$")

AGGRESSIVE_WINDOW = 250
AUTO_THRESHOLD = 6700
AUTO_PACK_LIMIT = 50
EXPIRE = '2.weeks.ago'
_OPTS = {'repack_all': {}}


def check_status(f):
@wraps(f)
def wrapper(*a, **kw):
fn = a[0]
status = f(*a, **kw)
if status['returncode'] != 0:
raise RuntimeError("'%s' failed during git.multi_gc" % fn.__name__)
return status
return wrapper


@check_status
def git_process(fn, *a, **kw):
return fn(*a, **kw)


def _update_repack_all_options(expire=EXPIRE):
a = True if "now" == expire else None
A = True if expire else None
unpack_unreachable = expire if expire else None
_OPTS['repack_all'] = dict(a=a, A=A, unpack_unreachable=unpack_unreachable)


def _too_many_loose_objects(repository):
obj_dir = os.path.join(repository.path, "objects/")
if AUTO_THRESHOLD <= 0:
return False
auto_thr = (AUTO_THRESHOLD + 255) // 256
if not os.path.isdir(obj_dir):
return False
files = os.listdir(obj_dir)
root = obj_dir
if not files:
return False
for f in files:
path = os.path.join(root, f)
if os.path.isdir(path):
root = path
break
cnt = 0
for f in os.listdir(root):
path = os.path.join(root, f)
if os.path.isfile(path) and P_OBJ.search(f):
cnt += 1
if cnt > auto_thr:
return True
return False


def _too_many_packs(repository):
if AUTO_PACK_LIMIT <= 0:
return False
path = os.path.join(repository.path, "objects/info/packs")
if not os.path.isfile(path):
return False
with open(path, 'r') as f:
lines = f.readlines()
packs = len(lines) - 1
if packs >= AUTO_PACK_LIMIT:
return True
return False


def need_to_gc(repository, expire=EXPIRE):
if AUTO_THRESHOLD <= 0:
return False
if _too_many_packs(repository):
_update_repack_all_options(expire=expire)
elif not _too_many_loose_objects():
return False
return True


class BfsQue(object):
def __init__(self, wanted, cnd_fn=lambda x, s: x in s):
self.data = []
self.visited = []
self.wanted = wanted
self.cnd = cnd_fn

def _visit(self, item):
addq = lambda q, x: q.append(x)
if self.cnd(item, self.wanted) and item not in self.data:
addq(self.data, item)
return True
addq(self.visited, item)
return False

def search(self, item):
empty = lambda x: len(x) == 0
addq = lambda q, x: q.append(x)
delq = lambda x: x.pop(0)
avail = []
if self._visit(item):
return
addq(avail, item)
while not empty(avail):
c = delq(avail)
for p in c.parents:
if p in self.visited or self._visit(p):
continue
addq(avail, p)


def gc_repository(repository, forks, auto=None, prune=None):
"""git gc command
"""
expire = 'now' if prune == 'all' else prune
if not expire:
expire = EXPIRE

git = git_with_repo(repository)
status = {'returncode': 0, 'fullcmd': '%s multi-gc' % ' '.join(git.cmds), 'stderr': '', 'stdout': ''}
try:
prune_opts = []
if prune:
prune_opts.append("--prune=" + prune)
status['fullcmd'] += ' ' + prune_opts[0]

if not forks:
return git.gc(*prune_opts, auto=auto)
else:
paths = [ "--fork='%s'" % r.path for r in forks]
status['fullcmd'] += ' ' + ' '.join(paths)

if auto:
status['fullcmd'] += ' --auto'
if not need_to_gc(repository, expire=expire):
return status
else:
_update_repack_all_options(expire=expire)

git_process(git.pack_refs, all=True, prune=True)
git_process(git.reflog, 'expire', all=True)
git_process(git.repack, d=True, l=True, **_OPTS['repack_all'])

que = BfsQue(repository, cnd_fn=lambda commit, repo: commit.id in repo)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xtao 使用新的办法,先listall_references,后BFS查找最新的可被解析的commit作为head传入prune

for f in forks:
refs = f.listall_references()
for ref in refs:
ref_commit = f.lookup_reference(ref).get_object()
que.search(ref_commit)
commits = [str(c.id) for c in que.data]
git_process(git.prune, *commits, expire=expire)
git_process(git.rerere, 'gc')
except Exception as e:
print >>sys.stderr, e
status['returncode'] = -1
return status
8 changes: 8 additions & 0 deletions ellen/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ellen.git.tag import list_tags, create_tag
from ellen.git.commit import create_commit
from ellen.git.diff import diff_wrapper as diff
from ellen.git.gc import gc_repository
from ellen.git.ref import update_ref
from ellen.git.clone import clone_repository, update_server_info
from ellen.git.init import init_repository
Expand Down Expand Up @@ -261,6 +262,13 @@ def create_tag(self, name, ref, author_name, author_email, message):
def update_hooks(self, path):
return update_hooks(self.repository, path)

def gc(self, fork_paths=None, auto=None, prune=None):
forks = []
if isinstance(fork_paths, (list, tuple)):
for p in fork_paths:
forks.append(repository(p))
return gc_repository(self.repository, forks, auto=auto, prune=prune)


def repository(path):
try:
Expand Down
131 changes: 131 additions & 0 deletions tests/test_gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-

import unittest
from pygit2 import Repository
from pygit2 import is_repository
from _base import BareRepoTest
from ellen.repo import Jagare
from ellen.git.gc import BfsQue

class test_gc(BareRepoTest):

def test_simple(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc()
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_simple_auto(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc(auto=True)
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_simple_all(self):
repo = Jagare(self.path)
pygit2_repo = Repository(self.path)
ret = repo.gc(prune='all')
self.assertEqual(ret['returncode'], 0)
self.assertTrue('gc' in ret['fullcmd'])
self.assertFalse(pygit2_repo.is_empty)
self.assertTrue(pygit2_repo.is_bare)
self.assertFalse(repo.empty)
self.assertTrue(repo.bare)

def test_multi(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)

def test_multi_auto(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path, auto=True)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)

def test_multi_all(self):
repo = Jagare(self.path)
path = self.get_temp_path()
clone_repo = repo.clone(path, shared=True)
pygit2_repo = Repository(path)
ret = repo.gc(fork_paths=path, auto=True)
self.assertTrue(is_repository(path))
self.assertFalse(pygit2_repo.is_empty)
self.assertFalse(pygit2_repo.is_bare)
self.assertFalse(clone_repo.empty)
self.assertFalse(clone_repo.bare)


class Node(object):
def __init__(self, id, parents):
self.id = id
self.parents = parents


class test_que(unittest.TestCase):
def setUp(self):
self.node7 = Node(7, [])
self.node5, self.node6 = Node(5, [self.node7]), Node(6, [self.node7])
self.node3, self.node4 = Node(3, [self.node5]), Node(4, [self.node6])
self.node2 = Node(2, [self.node3, self.node4])
self.node1 = Node(1, [self.node2])
self.first_node = Node(0, [self.node1])
self.cnd_fn = lambda item, wanted: item in wanted

def test_basic1(self):
self.que = BfsQue([self.first_node], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.first_node])

def test_basic2(self):
self.que = BfsQue([self.node1], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node1])

def test_basic3(self):
self.que = BfsQue([self.node3, self.node4], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node3, self.node4])

def test_basic4(self):
self.que = BfsQue([self.node5, self.node6], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node5, self.node6])

def test_basic5(self):
self.que = BfsQue([self.node7], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node7])

def test_neg1(self):
self.que = BfsQue([self.node3], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node3])

def test_neg2(self):
self.que = BfsQue([self.node5], cnd_fn=self.cnd_fn)
self.que.search(self.first_node)
self.assertEqual(self.que.data, [self.node5])