From 35cbf277fa6a528d5ad81caf36e94a6672465f72 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Sat, 8 Apr 2023 18:28:29 +0300 Subject: [PATCH 1/2] Wrap mutex around Dulwich repo access --- klaus/repo.py | 67 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/klaus/repo.py b/klaus/repo.py index 76cfbc4f..fa640f85 100644 --- a/klaus/repo.py +++ b/klaus/repo.py @@ -1,7 +1,9 @@ -import os +import functools import io +import os import stat import subprocess +import threading from dulwich.objects import S_ISGITLINK from dulwich.object_store import tree_lookup_path @@ -38,13 +40,25 @@ def cached_call(key, validator, producer, _cache={}): return data -class FancyRepo(dulwich.repo.Repo): +def synchronized(func, lock=threading.RLock()): + @functools.wraps(func) + def synchronized_func(*args, **kwargs): + with lock: + return func(*args, **kwargs) + return synchronized_func + + +class FancyRepo(object): """A wrapper around Dulwich's Repo that adds some helper methods.""" def __init__(self, path, namespace): - super(FancyRepo, self).__init__(path) + self.dulwich_repo = dulwich.repo.Repo(path) self.namespace = namespace + @property + def path(self): + return self.dulwich_repo.path + @property def name(self): return repo_human_name(self.path) @@ -56,7 +70,12 @@ def namespaced_name(self): else: return self.name + @synchronized + def __getitem__(self, key): + return self.dulwich_repo[key] + # TODO: factor out stuff into dulwich + @synchronized def get_last_updated_at(self): """Get datetime of last commit to this repository. @@ -79,12 +98,12 @@ def _get_commit_time(ref_id): return None max_refs = 1000 - if len(self.refs.keys()) > max_refs: + if len(self.dulwich_repo.refs.keys()) > max_refs: # If we have too many refs, look at the branches only. (And HEAD, see below.) base = b"refs/heads" else: base = None - all_ids = list(self.refs.as_dict(base).values()) + all_ids = list(self.dulwich_repo.refs.as_dict(base).values()) # If we still have too many refs, keep only some. if len(all_ids) > max_refs: all_ids = sorted(all_ids)[:max_refs] @@ -99,24 +118,26 @@ def _get_commit_time(ref_id): return None @property + @synchronized def cloneurl(self): """Retrieve the gitweb notion of the public clone URL of this repo.""" - f = self.get_named_file("cloneurl") + f = self.dulwich_repo.get_named_file("cloneurl") if f is not None: return force_unicode(f.read()) - c = self.get_config() + c = self.dulwich_repo.get_config() try: return force_unicode(c.get(b"gitweb", b"url")) except KeyError: return None + @synchronized def get_description(self): """Like Dulwich's `get_description`, but returns None if the file contains Git's default text "Unnamed repository[...]". """ # Cache result to speed up repo_list.html template. # If description file mtime has changed, we should invalidate the cache. - description_file = os.path.join(self._controldir, "description") + description_file = os.path.join(self.dulwich_repo._controldir, "description") try: description_mtime = os.stat(description_file).st_mtime except OSError: @@ -129,12 +150,13 @@ def get_description(self): ) def _get_description(self): - description = super(FancyRepo, self).get_description() + description = self.dulwich_repo.get_description() if description: description = force_unicode(description) if not description.startswith("Unnamed repository;"): return force_unicode(description) + @synchronized def get_commit(self, rev): """Get commit object identified by `rev` (SHA or branch or tag name).""" for prefix in ["refs/heads/", "refs/tags/", ""]: @@ -148,6 +170,7 @@ def get_commit(self, rev): pass raise KeyError(rev) + @synchronized def get_default_branch(self): """Tries to guess the default repo branch name.""" for candidate in ["master", "main", "trunk", "default", "gh-pages"]: @@ -165,6 +188,7 @@ def get_default_branch(self): else: return None + @synchronized def get_ref_names_ordered_by_last_commit(self, prefix, exclude=None): """Return a list of ref names that begin with `prefix`, ordered by the time they have been committed to last. @@ -181,28 +205,32 @@ def get_commit_time(refname): return obj.tag_time return obj.commit_time - refs = self.refs.as_dict(encode_for_git(prefix)) + refs = self.dulwich_repo.refs.as_dict(encode_for_git(prefix)) if exclude: refs.pop(prefix + exclude, None) sorted_names = sorted(refs.keys(), key=get_commit_time, reverse=True) return [decode_from_git(ref) for ref in sorted_names] + @synchronized def get_branch_names(self, exclude=None): """Return a list of branch names of this repo, ordered by the time they have been committed to last. """ return self.get_ref_names_ordered_by_last_commit("refs/heads", exclude) + @synchronized def get_tag_names(self): """Return a list of tag names of this repo, ordered by creation time.""" return self.get_ref_names_ordered_by_last_commit("refs/tags") + @synchronized def get_tag_and_branch_shas(self): """Return a list of SHAs of all tags and branches.""" - tag_shas = self.refs.as_dict(b"refs/tags/").values() - branch_shas = self.refs.as_dict(b"refs/heads/").values() + tag_shas = self.dulwich_repo.refs.as_dict(b"refs/tags/").values() + branch_shas = self.dulwich_repo.refs.as_dict(b"refs/heads/").values() return set(tag_shas) | set(branch_shas) + @synchronized def history(self, commit, path=None, max_commits=None, skip=0): """Return a list of all commits that affected `path`, starting at branch or commit `commit`. `skip` can be used for pagination, `max_commits` @@ -230,6 +258,7 @@ def history(self, commit, path=None, max_commits=None, skip=0): sha1_sums = output.strip().split(b"\n") return [self[sha1] for sha1 in sha1_sums] + @synchronized def blame(self, commit, path): """Return a 'git blame' list for the file at `path`: For each line in the file, the list contains the commit that last changed that line. @@ -243,11 +272,12 @@ def blame(self, commit, path): for sha1 in sha1_sums ] + @synchronized def get_blob_or_tree(self, commit, path): """Return the Git tree or blob object for `path` at `commit`.""" try: (mode, oid) = tree_lookup_path( - self.__getitem__, commit.tree, encode_for_git(path) + self.dulwich_repo.__getitem__, commit.tree, encode_for_git(path) ) except NotTreeError: # Some part of the path was a file where a folder was expected. @@ -255,6 +285,7 @@ def get_blob_or_tree(self, commit, path): raise KeyError return self[oid] + @synchronized def listdir(self, commit, path): """Return a list of submodules, directories and files in given directory: Lists of (link name, target path) tuples. @@ -283,6 +314,7 @@ def listdir(self, commit, path): return {"submodules": submodules, "dirs": dirs, "files": files} + @synchronized def commit_diff(self, commit): """Return the list of changes introduced by `commit`.""" from klaus.utils import guess_is_binary @@ -295,16 +327,16 @@ def commit_diff(self, commit): summary = {"nfiles": 0, "nadditions": 0, "ndeletions": 0} file_changes = [] # the changes in detail - dulwich_changes = self.object_store.tree_changes(parent_tree, commit.tree) + dulwich_changes = self.dulwich_repo.object_store.tree_changes(parent_tree, commit.tree) for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in dulwich_changes: summary["nfiles"] += 1 try: - oldblob = self.object_store[oldsha] if oldsha else Blob.from_string(b"") + oldblob = self.dulwich_repo.object_store[oldsha] if oldsha else Blob.from_string(b"") except KeyError: # probably related to submodules; Dulwich will handle that. oldblob = Blob.from_string(b"") try: - newblob = self.object_store[newsha] if newsha else Blob.from_string(b"") + newblob = self.dulwich_repo.object_store[newsha] if newsha else Blob.from_string(b"") except KeyError: # probably related to submodules; Dulwich will handle that. newblob = Blob.from_string(b"") @@ -338,6 +370,7 @@ def commit_diff(self, commit): return summary, file_changes + @synchronized def raw_commit_diff(self, commit): if commit.parents: parent_tree = self[commit.parents[0]].tree @@ -345,7 +378,7 @@ def raw_commit_diff(self, commit): parent_tree = None bytesio = io.BytesIO() dulwich.patch.write_tree_diff( - bytesio, self.object_store, parent_tree, commit.tree + bytesio, self.dulwich_repo.object_store, parent_tree, commit.tree ) return bytesio.getvalue() From 3c9bb8cc1246ef0f934a7d887a37b9444a33b0f9 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Sat, 8 Apr 2023 18:29:41 +0300 Subject: [PATCH 2/2] Repo page: if no default branch is found, use HEAD --- klaus/repo.py | 14 +++++--------- klaus/views.py | 6 +++++- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/klaus/repo.py b/klaus/repo.py index fa640f85..2f240ce9 100644 --- a/klaus/repo.py +++ b/klaus/repo.py @@ -108,7 +108,10 @@ def _get_commit_time(ref_id): if len(all_ids) > max_refs: all_ids = sorted(all_ids)[:max_refs] # Always add HEAD. - all_ids.append(self.refs[b"HEAD"]) + try: + all_ids.append(self.dulwich_repo.refs[b"HEAD"]) + except KeyError: + pass commit_times = filter(None, map(_get_commit_time_cached, all_ids)) try: @@ -179,14 +182,7 @@ def get_default_branch(self): return candidate except InaccessibleRef: pass - for name in self.get_branch_names(): - try: - self.get_commit(name) - return name - except InaccessibleRef: - pass - else: - return None + return None @synchronized def get_ref_names_ordered_by_last_commit(self, prefix, exclude=None): diff --git a/klaus/views.py b/klaus/views.py index 8ab98f0f..acacfb65 100644 --- a/klaus/views.py +++ b/klaus/views.py @@ -110,7 +110,11 @@ def _get_repo_and_rev(repo, namespace=None, rev=None, path=None): if rev is None: rev = repo.get_default_branch() if rev is None: - raise NotFound("Empty repository") + rev = "HEAD" + try: + repo.get_commit("HEAD") + except KeyError: + raise NotFound("No commits yet") i = len(rev) while i > 0: