Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New config manager #760

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion datalad_next/annexremotes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# in a single place
from annexremote import UnsupportedRequest
from typing import Any
from os import environ

from datalad.customremotes import (
# this is an enhanced RemoteError that self-documents its cause
Expand Down Expand Up @@ -32,9 +33,20 @@ def repo(self) -> LeanAnnexRepo:
to limit further proliferation of the ``AnnexRepo`` API.
"""
if self._repo is None:
self._repo = LeanAnnexRepo(self.annex.getgitdir())
self._repo = LeanAnnexRepo(self.repodir)
return self._repo

@property
def repodir(self) -> str:
import sys
repodir = self.annex.getgitdir()
# git-annex also sets GIT_DIR, and we want to account for that
# to be able to run regular Git command in this environment
gitdir_env = environ.get("GIT_DIR")
if gitdir_env and repodir.endswith(gitdir_env):
repodir = repodir[:-len(gitdir_env)]
return repodir

@property
def remotename(self) -> str:
"""Name of the (git) remote the special remote is operating under"""
Expand Down
30 changes: 20 additions & 10 deletions datalad_next/annexremotes/archivist.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from pathlib import Path
from shutil import copyfileobj
import sys
from typing import (
Dict,
Generator,
Expand Down Expand Up @@ -162,7 +163,7 @@ def prepare(self):
# us a `LeanAnnexRepo`.
# TODO it is unclear to MIH what is actually needed API-wise of the legacy
# interface. Needs research.
self._repo = LegacyAnnexRepo(self.annex.getgitdir())
self._repo = LegacyAnnexRepo(self.repodir)
# are we in legacy mode?
# let remote-specific setting take priority (there could be
# multiple archivist-type remotes configured), and use unspecific switch
Expand All @@ -175,7 +176,7 @@ def prepare(self):
# other code in this file will run!!!
# __getattribute__ will relay all top-level operations
# to an instance of the legacy implementation
from datalad.customremotes.archives import ArchiveAnnexCustomRemote
from datalad.customremotes.archives import ArchiveAnnexCustomRemote # type: ignore
lsr = ArchiveAnnexCustomRemote(self.annex)
lsr.prepare()
# we can skip everything else, it won't be triggered anymore
Expand Down Expand Up @@ -377,7 +378,7 @@ class _ArchiveHandlers:
The main functionality is provided by ``from_locators()``.
"""
# TODO make archive access caching behavior configurable from the outside
def __init__(self, repo):
def __init__(self, repo: LegacyAnnexRepo):
# mapping of archive keys to an info dict
self._db: Dict[AnnexKey, _ArchiveInfo] = {}
# for running git-annex queries against the repo
Expand Down Expand Up @@ -422,6 +423,7 @@ def from_locators(
}.items():
# local_path will be None now, if not around
if kh.local_path:
assert kh.handler
# we found one with a local archive.
# yield handler and all matching locators
yield kh.handler, [loc for loc in locs if loc.akey == akey]
Expand Down Expand Up @@ -466,7 +468,8 @@ def from_locators(
exc = []
# but this time sort the keys to start with the smallest ones
# (just in case a download is involved)
for akey in sorted(akeys, key=lambda x: x.size):
# when no size info is available, assume worst case
for akey in sorted(akeys, key=lambda x: x.size or sys.maxsize):
# at this point we must have an existing _ArchiveInfo record
# for this akey
ainfo = self._db[akey]
Expand All @@ -485,11 +488,17 @@ def from_locators(
# exceptions, make sure to report them
if exc:
# TODO better error
e = RuntimeError(
class RuntimeErrors(RuntimeError):
def __init__(self, msg, errors):
RuntimeError.__init__(self, msg)
self.errors = errors

overall_exc = RuntimeErrors(
'Exhausted all candidate archive handlers '
f'(previous failures {exc})')
e.errors = exc
raise e
f'(previous failures {exc})',
errors=exc,
)
raise overall_exc

def _get_archive_info(
self,
Expand Down Expand Up @@ -523,6 +532,7 @@ def _get_local_handler(self, ainfo: _ArchiveInfo) -> ArchiveOperations:
raise NotImplementedError

if ainfo.type == ArchiveType.tar:
assert ainfo.local_path is not None
from datalad_next.archive_operations import TarArchiveOperations
return TarArchiveOperations(
ainfo.local_path,
Expand Down Expand Up @@ -564,9 +574,9 @@ def _get_key_contentpath(repo: LegacyAnnexRepo, key: str):
# and the content can be found at the location
loc = next(repo.call_annex_items_(['contentlocation', key]))
# convert to path. git-annex will report a path relative to the
# dotgit-dir
# CWD of the call above
# TODO platform-native?
loc = repo.dot_git / Path(loc)
loc = repo.pathobj / Path(loc)
except CommandError:
loc = None
return loc
92 changes: 91 additions & 1 deletion datalad_next/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,101 @@

This modules provides the central ``ConfigManager`` class.

.. todo::

Mention ``defaults``, ``manager``, and ``legacy_cfg``


Validation of configuration item values

There are two ways to do validation and type conversion. on-access, or
on-load. Doing it on-load would allow to reject invalid configuration
immediately. But it might spend time on items that never get accessed.
On-access might waste cycles on repeated checks, and possible complain later
than useful. Here we nevertheless run a validator on-access in the default
implementation. Particular sources may want to override this, or ensure that
the stored value that is passed to a validator is already in the best possible
form to make re-validation the cheapest.

.. currentmodule:: datalad_next.config
.. autosummary::
:toctree: generated

ConfigManager
LegacyConfigManager
LegacyEnvironment
GitConfig
SystemGitConfig
GlobalGitConfig
LocalGitConfig
GitEnvironment
ImplementationDefault
defaults
dialog
legacy_register_config
legacy_cfg
"""

from datalad.config import ConfigManager
__all__ = [
'ConfigManager',
'LegacyConfigManager',
'LegacyEnvironment',
'GitConfig',
'SystemGitConfig',
'GlobalGitConfig',
'LocalGitConfig',
'GitEnvironment',
'ImplementationDefault',
'defaults',
'dialog',
'legacy_register_config',
'legacy_cfg',
]

# TODO: eventually replace with
# from .legacy import ConfigManager
from datalad.config import ConfigManager # type: ignore

from . import dialog
from .default import (
ImplementationDefault,
legacy_register_config,
)
from .default import (
load_legacy_defaults as _load_legacy_defaults,
)
from .env import LegacyEnvironment
from .git import (
GitConfig,
GlobalGitConfig,
LocalGitConfig,
SystemGitConfig,
)
from .gitenv import GitEnvironment
from .legacy import ConfigManager as LegacyConfigManager
from .manager import ConfigManager as NextGenConfigManager

# instance for registering all defaults
defaults = ImplementationDefault()
# load up with legacy registrations for now
_load_legacy_defaults(defaults)

# order reflects precedence rule, first source with a key takes precedence
manager = NextGenConfigManager(defaults=defaults)
legacy_cfg = LegacyConfigManager()

ConfigManager.__doc__ = """\
Do not use anymore

.. deprecated:: 1.6

The use of this class is discouraged. It is a legacy import from the
``datalad`` package, and a near drop-in replacement is provided with
:class:`LegacyConfigManager`. Moreover, a :class:`LegacyConfigManager`-based
instance of a global configuration manager is available as a
:obj:`datalad_next.config.legacy_cfg` object in this module.

New implementation are encourage to use the
:obj:`datalad_next.config.manager` object (and instance of
:class:`MultiConfiguration`) to query and manipulate configuration items.
"""
127 changes: 127 additions & 0 deletions datalad_next/config/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from __future__ import annotations

import logging
from typing import (
Any,
Callable,
)

# momentarily needed for the legacy_register_config()
# implementation.
from datalad.interface.common_cfg import definitions # type: ignore
from datalad.support.extensions import ( # type: ignore
register_config as _legacy_register_config,
)
from datasalad.settings import Defaults

from datalad_next.config.dialog import get_dialog_class_from_legacy_ui_label
from datalad_next.config.item import (
ConfigurationItem,
UnsetValue,
)
from datalad_next.constraints import (
Constraint,
EnsureNone,
)

lgr = logging.getLogger('datalad.config')


class ImplementationDefault(Defaults):
def __str__(self):
return 'ImplementationDefaults'


#
# legacy support tooling from here.
# non of this is executed by the code above. It has to be triggered manually
# and pointed to an instance of ImplementationDefaults
#

def load_legacy_defaults(source: ImplementationDefault) -> None:
for name, cfg in definitions.items():
if 'default' not in cfg:
lgr.debug(
'Configuration %r has no default(_fn), not registering',
name
)
continue

cfg_props = cfg._props
ui = cfg_props.get('ui', None)
if ui is not None:
dialog = get_dialog_class_from_legacy_ui_label(ui[0])(
title=ui[1]['title'],
text=ui[1].get('text', ''),
)
else:
dialog = None

coercer = cfg_props.get('type')
if name == 'datalad.tests.temp.dir':
# https://github.com/datalad/datalad/issues/7662
coercer = coercer | EnsureNone()

default = cfg_props.get('default', UnsetValue)
default_fn = cfg_props.get('default_fn')

source[name] = ConfigurationItem(
default_fn if default_fn else default,
validator=coercer,
lazy=default_fn is not None,
dialog=dialog,
store_target=get_store_target_from_destination_label(
cfg_props.get('destination'),
),
)


def legacy_register_config(
source: ImplementationDefault,
name: str,
title: str,
*,
default: Any = UnsetValue,
default_fn: Callable | None = None,
description: str | None = None,
type: Constraint | None = None, # noqa: A002
dialog: str | None = None,
scope: str | type[UnsetValue] = UnsetValue,
):
source[name] = ConfigurationItem(
default_fn if default_fn else default,
validator=type,
lazy=default_fn is not None,
dialog=None if dialog is None
else get_dialog_class_from_legacy_ui_label(dialog)(
title=title,
text=description or '',
),
store_target=get_store_target_from_destination_label(scope),
)
# lastly trigger legacy registration
_legacy_register_config(
name=name,
title=title,
default=default,
default_fn=default_fn,
description=description,
type=type,
dialog=dialog,
scope=scope,
)


def get_store_target_from_destination_label(
label: str | UnsetValue | None,
) -> str | None:
if label in (None, UnsetValue):
return None
if label == 'global':
return 'GlobalGitConfig'
if label == 'local':
return 'LocalGitConfig'
if label == 'dataset':
return 'DatasetBranchConfig'
msg = f'unsupported configuration destination label {label!r}'
raise ValueError(msg)
Loading
Loading