Skip to content

Commit 5895bc2

Browse files
committed
Automatic repository detection
- Add wsgi_autodetect(ing).py with usage like the existing wsgi_autoreload(ing).py scripts. - Can handle directories with and without .git suffix. - Factor out the repository container functionality from the Klaus object into its own class hierarchy (RepoContainer). - Certain aspects of the automatic detection are configurable (specifically, the path that determines whether a subdirectory is a valid repo, whether it should detect removed repos and what are acceptable suffixes).
1 parent 0dacec5 commit 5895bc2

File tree

5 files changed

+307
-24
lines changed

5 files changed

+307
-24
lines changed

klaus/__init__.py

+30-23
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from dulwich.errors import NotGitRepository
1313

1414
from klaus import utils, views
15-
from klaus.repo import FancyRepo, InvalidRepo
15+
from klaus.repo import DefaultRepoContainer
1616

1717
KLAUS_VERSION = utils.guess_git_revision() or "3.0.1"
1818

@@ -23,20 +23,40 @@ class Klaus(flask.Flask):
2323
"undefined": jinja2.StrictUndefined,
2424
}
2525

26-
def __init__(self, repo_paths, site_name, use_smarthttp, ctags_policy="none"):
26+
def __init__(
27+
self,
28+
repo_paths,
29+
site_name,
30+
use_smarthttp,
31+
ctags_policy="none",
32+
repo_container_factory=None,
33+
):
2734
"""(See `make_app` for parameter descriptions.)"""
2835
self.site_name = site_name
2936
self.use_smarthttp = use_smarthttp
3037
self.ctags_policy = ctags_policy
3138

32-
valid_repos, invalid_repos = self.load_repos(repo_paths)
33-
self.valid_repos = {repo.namespaced_name: repo for repo in valid_repos}
34-
self.invalid_repos = {repo.namespaced_name: repo for repo in invalid_repos}
39+
if repo_container_factory is None:
40+
repo_container_factory = DefaultRepoContainer
41+
42+
self.repo_container = repo_container_factory(repo_paths)
3543

3644
flask.Flask.__init__(self, __name__)
3745

3846
self.setup_routes()
3947

48+
@property
49+
def valid_repos(self):
50+
"""Repositories that are considered valid by the repository manager"""
51+
52+
return self.repo_container.valid
53+
54+
@property
55+
def invalid_repos(self):
56+
"""Repositories that were declined by the repository manager"""
57+
58+
return self.repo_container.invalid
59+
4060
def create_jinja_environment(self):
4161
"""Called by Flask.__init__"""
4262
env = super().create_jinja_environment()
@@ -95,17 +115,6 @@ def should_use_ctags(self, git_repo, git_commit):
95115
else:
96116
raise ValueError("Unknown ctags policy %r" % self.ctags_policy)
97117

98-
def load_repos(self, repo_paths):
99-
valid_repos = []
100-
invalid_repos = []
101-
for namespace, paths in repo_paths.items():
102-
for path in paths:
103-
try:
104-
valid_repos.append(FancyRepo(path, namespace))
105-
except NotGitRepository:
106-
invalid_repos.append(InvalidRepo(path, namespace))
107-
return valid_repos, invalid_repos
108-
109118

110119
def make_app(
111120
repo_paths,
@@ -116,6 +125,7 @@ def make_app(
116125
disable_push=False,
117126
unauthenticated_push=False,
118127
ctags_policy="none",
128+
repo_container_factory=None,
119129
):
120130
"""
121131
Returns a WSGI app with all the features (smarthttp, authentication)
@@ -145,6 +155,8 @@ def make_app(
145155
- 'tags-and-branches': use ctags for revisions that are the HEAD of
146156
a tag or branc
147157
- 'ALL': use ctags for all revisions, may result in high server load!
158+
:param repo_container_factory: An instance of klaus.repo.BaseRepoContainer or None,
159+
in which klaus.repo.DefaultRepoContainer will be used.
148160
"""
149161
if unauthenticated_push:
150162
if not use_smarthttp:
@@ -159,25 +171,20 @@ def make_app(
159171
raise ValueError(
160172
"'htdigest_file' set without 'use_smarthttp' or 'require_browser_auth'"
161173
)
162-
if not isinstance(repo_paths, dict):
163-
# If repos is given as a flat list, put all repos under the "no namespace" namespace
164-
repo_paths = {None: repo_paths}
165174

166175
app = Klaus(
167176
repo_paths,
168177
site_name,
169178
use_smarthttp,
170179
ctags_policy,
180+
repo_container_factory,
171181
)
172182
app.wsgi_app = utils.ProxyFix(app.wsgi_app)
173183

174184
if use_smarthttp:
175185
# `path -> Repo` mapping for Dulwich's web support
176186
dulwich_backend = dulwich.server.DictBackend(
177-
{
178-
"/" + namespaced_name: repo
179-
for namespaced_name, repo in app.valid_repos.items()
180-
}
187+
utils.SlashDictProxy(app.valid_repos)
181188
)
182189
# Dulwich takes care of all Git related requests/URLs
183190
# and passes through everything else to klaus

klaus/contrib/wsgi_autodetect.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import os
2+
import warnings
3+
import contextlib
4+
5+
from .app_args import get_args_from_env, strtobool
6+
from .wsgi_autodetecting import make_autodetecting_app
7+
8+
try:
9+
repos_root = os.environ['KLAUS_REPOS_ROOT']
10+
except KeyError:
11+
repos_root = os.environ['KLAUS_REPOS']
12+
warnings.warn(
13+
"use KLAUS_REPOS_ROOT instead of KLAUS_REPOS for the autodecting apps",
14+
DeprecationWarning,
15+
)
16+
17+
args, kwargs = get_args_from_env()
18+
args = (repos_root,) + args[1:]
19+
20+
with contextlib.suppress(KeyError):
21+
kwargs['detect_removals'] = bool(strtobool(os.environ['KLAUS_DETECT_REMOVALS']))
22+
23+
with contextlib.suppress(KeyError):
24+
kwargs['export_ok_path'] = os.environ['KLAUS_EXPORT_OK_PATH']
25+
26+
with contextlib.suppress(KeyError):
27+
# How to deal with repository directories named "foo" and/or "foo.git".
28+
# This is a list of potential suffixes, with your operating system's
29+
# directory separator as a separator. Examples:
30+
#
31+
# KLAUS_EXPORT_OK_PATH="/.git"
32+
# Directories with and without .git are accepted
33+
# (the first entry is the empty string). Default.
34+
#
35+
# KLAUS_EXPORT_OK_PATH=".git"
36+
# Only .git directories are accepted.
37+
#
38+
# KLAUS_EXPORT_OK_PATH=""
39+
# The .git suffix is not considered.
40+
41+
kwargs['directory_suffixes'] = os.environ['KLAUS_DIRECTORY_SUFFIXES'].split(os.sep)
42+
43+
application = make_autodetecting_app(*args, **kwargs)

klaus/contrib/wsgi_autodetecting.py

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
"""
2+
Alternative take on the "automatically discovered repositories" concept
3+
that requires no threads, polling or inotify. Instead the filesystem is
4+
consulted whenever a repository name is looked up.
5+
6+
Since Path.exists() and Path.iterdir() are fairly quick filesystem
7+
operations, performance should be good for small to medium sites.
8+
FancyRepo() objects are cached.
9+
10+
Repositories are identified by the existence of a
11+
12+
<reponame>/git-daemon-export-ok
13+
14+
file (for compatibility with gitweb). You can customize this path using
15+
the export_ok_path parameter. Setting it to '.' will cause every
16+
subdirectory to be considered a git repository.
17+
18+
For large sites this approach may be hard on the filesystem when listing
19+
repositories, because the process of enumerating the git repositories
20+
causes the git-daemon-export-ok file to be checked in every repository.
21+
This can be mitigated by setting detect_removals to False.
22+
"""
23+
24+
import collections.abc
25+
import functools
26+
import os
27+
import pathlib
28+
29+
import klaus
30+
import klaus.repo
31+
32+
_bad_names = frozenset([os.curdir, os.pardir])
33+
_bad_chars = frozenset(['\0', os.sep, os.altsep])
34+
_default_directory_suffixes = ['', '.git']
35+
36+
37+
def coalesce(*args):
38+
"""Return the first argument that is not None"""
39+
40+
return next(arg for arg in args if arg is not None)
41+
42+
43+
class AutodetectingRepoDict(collections.abc.Mapping):
44+
"""
45+
Maintain a virtual read-only dictionary whose contents represent
46+
the presence of git repositories in the given root directory.
47+
48+
:param root: The path to a directory containing repositories, each
49+
a direct subdirectory of the root.
50+
:param namespace: A namespace that will be applied to all detected
51+
repositories.
52+
:param detect_removals: Detect if repositories have been removed.
53+
Defaults to True. Setting it to False can improve performance
54+
for repository listings in very large sites.
55+
:param export_ok_path: The filesystem path to check (relative to
56+
the candidate repository root) to see if it is a valid servable
57+
git repository. Defaults to 'git-daemon-export-ok'. Set to '.'
58+
if every directory is known to be a valid repository root.
59+
:param directory_suffixes: A list of suffixes that your git directories
60+
may have. The default is ['', '.git'].
61+
"""
62+
63+
def __init__(
64+
self,
65+
root,
66+
namespace=None,
67+
detect_removals=None,
68+
export_ok_path=None,
69+
directory_suffixes=None,
70+
):
71+
self._root = pathlib.Path(root)
72+
self._cache = {}
73+
self._namespace = namespace
74+
self._detect_removals = coalesce(detect_removals, True)
75+
self._export_ok_path = coalesce(export_ok_path, 'git-daemon-export-ok')
76+
# Use the keys of a dict in reverse order so that we can create a sort
77+
# of "poor man's splay tree": the suffixes are always tried in reverse
78+
# order. If a suffix was matched succesfully it is moved to the end by
79+
# removing and readding it so that it is tried as the first option for
80+
# the next repository.
81+
self._suffixes = dict.fromkeys(
82+
reversed(list(coalesce(directory_suffixes, _default_directory_suffixes)))
83+
)
84+
85+
def __getitem__(self, name):
86+
if (
87+
not name
88+
or name.startswith('.')
89+
or name in _bad_names
90+
or not _bad_chars.isdisjoint(name)
91+
):
92+
raise KeyError(name)
93+
94+
if not self._detect_removals:
95+
# Try returning a cached version first, to avoid filesystem access
96+
try:
97+
return self._cache[name]
98+
except KeyError:
99+
pass
100+
101+
for suffix in reversed(self._suffixes):
102+
# Bare git repositories may have a .git suffix on the directory name:
103+
path = self._root / (name + suffix)
104+
if (path / self._export_ok_path).exists():
105+
# Reorder suffix test order on the assumption that most repos will
106+
# have the same suffix:
107+
del self._suffixes[suffix]
108+
self._suffixes[suffix] = None
109+
break
110+
else:
111+
self._cache.pop(name, None)
112+
raise KeyError(name)
113+
114+
if self._detect_removals:
115+
try:
116+
return self._cache[name]
117+
except KeyError:
118+
pass
119+
120+
repo = klaus.repo.FancyRepo(str(path), self._namespace)
121+
self._cache[name] = repo
122+
return repo
123+
124+
def __iter__(self):
125+
def is_valid_repo(path):
126+
if not self._detect_removals and path.name in self._cache:
127+
return True
128+
return (path / self._export_ok_path).exists()
129+
130+
suffixes = sorted(self._suffixes, key=len, reverse=True)
131+
132+
def removesuffixes(string):
133+
for suffix in suffixes:
134+
attempt = string.removesuffix(suffix)
135+
if attempt != string:
136+
return attempt
137+
return string
138+
139+
return (
140+
removesuffixes(path.name)
141+
for path in self._root.iterdir()
142+
if is_valid_repo(path)
143+
)
144+
145+
def __len__(self):
146+
return sum(1 for _ in self)
147+
148+
149+
class AutodetectingRepoContainer(klaus.repo.BaseRepoContainer):
150+
"""
151+
RepoContainer based on AutodetectingRepoDict.
152+
See AutodetectingRepoDict for parameter descriptions.
153+
"""
154+
155+
def __init__(self, repos_root, *args, **kwargs):
156+
super().__init__(repos_root)
157+
self.valid = AutodetectingRepoDict(repos_root, *args, **kwargs)
158+
159+
160+
def make_autodetecting_app(
161+
repos_root,
162+
*args,
163+
detect_removals=None,
164+
export_ok_path=None,
165+
directory_suffixes=None,
166+
**kwargs,
167+
):
168+
return klaus.make_app(
169+
repos_root,
170+
*args,
171+
repo_container_factory=functools.partial(
172+
AutodetectingRepoContainer,
173+
detect_removals=detect_removals,
174+
export_ok_path=export_ok_path,
175+
directory_suffixes=directory_suffixes,
176+
),
177+
**kwargs,
178+
)

klaus/repo.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import dulwich
99
import dulwich.patch
10-
from dulwich.errors import NotTreeError
10+
from dulwich.errors import NotGitRepository, NotTreeError
1111
from dulwich.object_store import tree_lookup_path
1212
from dulwich.objects import S_ISGITLINK, Blob
1313

@@ -435,3 +435,32 @@ def namespaced_name(self):
435435
return f"~{self.namespace}/{self.name}"
436436
else:
437437
return self.name
438+
439+
440+
class BaseRepoContainer:
441+
"""Abstract base class for repository containers."""
442+
443+
def __init__(self, repo_paths):
444+
self._repo_paths = repo_paths
445+
self.valid = {}
446+
self.invalid = {}
447+
448+
449+
class DefaultRepoContainer(BaseRepoContainer):
450+
"""Default repository container that holds a preset list of repositories"""
451+
452+
def __init__(self, repo_paths):
453+
if not isinstance(repo_paths, dict):
454+
# If repos is given as a flat list, put all repos under the "no namespace" namespace
455+
repo_paths = {None: repo_paths}
456+
457+
super().__init__(repo_paths)
458+
459+
for namespace, paths in repo_paths.items():
460+
for path in paths:
461+
try:
462+
repo = FancyRepo(path, namespace)
463+
self.valid[repo.namespaced_name] = repo
464+
except NotGitRepository:
465+
repo = InvalidRepo(path, namespace)
466+
self.invalid[repo.namespaced_name] = repo

0 commit comments

Comments
 (0)