Skip to content

Commit

Permalink
Better integrate automatic repo detection into the Klaus codebase
Browse files Browse the repository at this point in the history
- Rename auto_klaus.py to wsgi_autodetect(ing).py and make its usage
  more like the existing wsgi_autoreload(ing).py scripts.

- Factor out the repository container functionality from the Klaus
  object into its own class hierarchy (RepoContainer).

- Make certain aspects of the automatic detection configurable
  (specifically, the path that determines whether a subdirectory is
  a valid repo, and whether it should detect removed repos).
  • Loading branch information
wsldankers committed May 5, 2021
1 parent 33c287f commit c30a04a
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 127 deletions.
46 changes: 23 additions & 23 deletions klaus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import flask
import httpauth
import dulwich.web
from dulwich.errors import NotGitRepository
import collections.abc
from klaus import views, utils
from klaus.repo import FancyRepo, InvalidRepo
from klaus.repo import DefaultRepoContainer


KLAUS_VERSION = utils.guess_git_revision() or "1.5.2"
Expand All @@ -16,20 +16,34 @@ class Klaus(flask.Flask):
"undefined": jinja2.StrictUndefined,
}

def __init__(self, repo_paths, site_name, use_smarthttp, ctags_policy="none"):
def __init__(
self,
repo_paths,
site_name,
use_smarthttp,
ctags_policy="none",
repo_container_factory=None,
):
"""(See `make_app` for parameter descriptions.)"""
self.site_name = site_name
self.use_smarthttp = use_smarthttp
self.ctags_policy = ctags_policy

valid_repos, invalid_repos = self.load_repos(repo_paths)
self.valid_repos = {repo.namespaced_name: repo for repo in valid_repos}
self.invalid_repos = {repo.namespaced_name: repo for repo in invalid_repos}
if repo_container_factory is None:
repo_container_factory = DefaultRepoContainer

self.valid_repos = repo_container_factory(repo_paths)

flask.Flask.__init__(self, __name__)

self.setup_routes()

@property
def invalid_repos(self):
"""Repositories that were declined by the repository manager"""

return self.valid_repos.invalid

def create_jinja_environment(self):
"""Called by Flask.__init__"""
env = super(Klaus, self).create_jinja_environment()
Expand Down Expand Up @@ -88,16 +102,6 @@ def should_use_ctags(self, git_repo, git_commit):
else:
raise ValueError("Unknown ctags policy %r" % self.ctags_policy)

def load_repos(self, repo_paths):
valid_repos = []
invalid_repos = []
for namespace, paths in repo_paths.items():
for path in paths:
try:
valid_repos.append(FancyRepo(path, namespace))
except NotGitRepository:
invalid_repos.append(InvalidRepo(path, namespace))
return valid_repos, invalid_repos


def make_app(
Expand All @@ -109,6 +113,7 @@ def make_app(
disable_push=False,
unauthenticated_push=False,
ctags_policy="none",
repo_container_factory=None,
):
"""
Returns a WSGI app with all the features (smarthttp, authentication)
Expand Down Expand Up @@ -152,25 +157,20 @@ def make_app(
raise ValueError(
"'htdigest_file' set without 'use_smarthttp' or 'require_browser_auth'"
)
if not isinstance(repo_paths, dict):
# If repos is given as a flat list, put all repos under the "no namespace" namespace
repo_paths = {None: repo_paths}

app = Klaus(
repo_paths,
site_name,
use_smarthttp,
ctags_policy,
repo_container_factory,
)
app.wsgi_app = utils.ProxyFix(app.wsgi_app)

if use_smarthttp:
# `path -> Repo` mapping for Dulwich's web support
dulwich_backend = dulwich.server.DictBackend(
{
"/" + namespaced_name: repo
for namespaced_name, repo in app.valid_repos.items()
}
utils.SlashDictProxy(app.valid_repos)
)
# Dulwich takes care of all Git related requests/URLs
# and passes through everything else to klaus
Expand Down
103 changes: 0 additions & 103 deletions klaus/contrib/auto_klaus.py

This file was deleted.

33 changes: 33 additions & 0 deletions klaus/contrib/wsgi_autodetect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import warnings
import distutils.util

from .app_args import get_args_from_env
from .wsgi_autodetecting import make_autodetecting_app


try:
repos_root = os.environ['KLAUS_REPOS_ROOT']
except KeyError:
repos_root = os.environ['KLAUS_REPOS']
warnings.warn(
"use KLAUS_REPOS_ROOT instead of KLAUS_REPOS for the autodecting apps",
DeprecationWarning,
)

args, kwargs = get_args_from_env()
args = (repos_root,) + args[1:]

try:
detect_removals = os.environ['KLAUS_DETECT_REMOVALS']
except KeyError:
pass
else:
kwargs['detect_removals'] = distutils.util.strtobool(detect_removals)

try:
kwargs['export_ok_path'] = os.environ['KLAUS_EXPORT_OK_PATH']
except KeyError:
pass

application = make_autodetecting_app(*args, **kwargs)
127 changes: 127 additions & 0 deletions klaus/contrib/wsgi_autodetecting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Alternative take on the "automatically discovered repositories" concept
that requires no threads, polling or inotify. Instead the filesystem is
consulted whenever a repository name is looked up.
Since os.path.exists() and os.listdir() are fairly quick filesystem
operations, performance should be good for small to medium sites.
FancyRepo() objects are cached.
Repositories are identified by the existence of a
<reponame>/git-daemon-export-ok
file (for compatibility with gitweb). You can customize this path using
the export_ok_path parameter. Setting it to '.' will cause every
subdirectory to be considered a git repository.
For large sites this approach may be hard on the filesystem when listing
repositories, because the process of enumerating the git repositories
causes the git-daemon-export-ok file to be checked in every repository.
This can be mitigated by setting detect_removals to False.
"""

import pathlib
import os
import os.path
import functools

import klaus
import klaus.repo


def coalesce(*args):
"""Return the first argument that is not None"""

return next(arg for arg in args if arg is not None)


class AutodetectingRepoContainer(klaus.repo.BaseRepoContainer):
"""
Maintain a virtual read-only dictionary whose contents represent
the presence of git repositories in the given root directory.
:param root: The path to a directory containing repositories, each
a direct subdirectory of the root.
:param namespace: A namespace that will be applied to all detected
repositories.
:param detect_removals: Detect if repositories have been removed.
Defaults to True. Setting it to False can improve performance
for repository listings in very large sites.
:param export_ok_path: The filesystem path to check (relative to
the candidate repository root) to see if it is a valid servable
git repository. Defaults to 'git-daemon-export-ok'. Set to '.'
if every directory is known to be a valid repository root.
"""

def __init__(
self,
root,
namespace=None,
detect_removals=None,
export_ok_path=None,
):
super().__init__([])
self._root = pathlib.Path(root)
self._namespace = namespace
self._detect_removals = coalesce(detect_removals, True)
self._export_ok_path = coalesce(export_ok_path, 'git-daemon-export-ok')

def __getitem__(self, name):
if not name or name[0] == '.' or name in {os.curdir, os.pardir} or any(
badness in name for badness in
['\0', os.sep, os.altsep]
if badness is not None
):
raise KeyError(name)

if not self._detect_removals:
# Try returning a cached version first, to avoid filesystem access
try:
return self._base[name]
except KeyError:
pass

path = self._root / name
if not os.path.exists(path / self._export_ok_path):
self._base.pop(name, None)
raise KeyError(name)

if self._detect_removals:
try:
return self._base[name]
except KeyError:
pass

repo = klaus.repo.FancyRepo(str(path), self._namespace)
self._base[name] = repo
return repo

def __iter__(self):
def is_valid_repo(name):
if not self._detect_removals and name in self._base:
return True
return os.path.exists(self._root / name / self._export_ok_path)

return (name for name in os.listdir(self._root) if is_valid_repo(name))

def __len__(self):
return sum(1 for _ in self)

def make_autodetecting_app(
repos_root,
*args,
detect_removals=None,
export_ok_path=None,
**kwargs,
):
return klaus.make_app(
repos_root,
*args,
repo_container_factory=functools.partial(
AutodetectingRepoContainer,
detect_removals=detect_removals,
export_ok_path=export_ok_path,
),
**kwargs,
)
Loading

0 comments on commit c30a04a

Please sign in to comment.