diff --git a/datalad_next/config/__init__.py b/datalad_next/config/__init__.py index 37ee453a..358b04ce 100644 --- a/datalad_next/config/__init__.py +++ b/datalad_next/config/__init__.py @@ -2,11 +2,109 @@ This modules provides the central ``ConfigManager`` class. +.. todo:: + + Mention ``defaults``, ``manager``, and ``legacy_cfg`` + + +Validation of configuration item values + +There are two ways to do validation and type conversion. on-access, or +on-load. Doing it on-load would allow to reject invalid configuration +immediately. But it might spend time on items that never get accessed. +On-access might waste cycles on repeated checks, and possible complain later +than useful. Here we nevertheless run a validator on-access in the default +implementation. Particular sources may want to override this, or ensure that +the stored value that is passed to a validator is already in the best possible +form to make re-validation the cheapest. + .. currentmodule:: datalad_next.config .. autosummary:: :toctree: generated ConfigManager + LegacyConfigManager + LegacyEnvironment + GitConfig + SystemGitConfig + GlobalGitConfig + LocalGitConfig + ImplementationDefault + defaults + dialog + legacy_register_config + legacy_cfg """ -from datalad.config import ConfigManager +__all__ = [ + 'ConfigManager', + 'LegacyConfigManager', + 'LegacyEnvironment', + 'GitConfig', + 'SystemGitConfig', + 'GlobalGitConfig', + 'LocalGitConfig', + 'ImplementationDefault', + 'defaults', + 'dialog', + 'legacy_register_config', + 'legacy_cfg', +] + +# TODO: eventually replace with +# from .legacy import ConfigManager +from datalad.config import ConfigManager # type: ignore +from datasalad.settings import Settings + +from . import dialog +from .default import ( + ImplementationDefault, + legacy_register_config, +) +from .default import ( + load_legacy_defaults as _load_legacy_defaults, +) +from .env import LegacyEnvironment +from .git import ( + GitConfig, + GlobalGitConfig, + LocalGitConfig, + SystemGitConfig, +) +from .legacy import ConfigManager as LegacyConfigManager + +# instance for registering all defaults +defaults = ImplementationDefault() +# load up with legacy registrations for now +_load_legacy_defaults(defaults) + +manager = Settings( + { + # order reflects precedence rule, first source with a + # key takes precedence + 'legacy-environment': LegacyEnvironment(), + #'git-local': ..., + 'git-global': GlobalGitConfig(), + 'git-system': SystemGitConfig(), + #'datalad-branch': ..., + 'defaults': defaults, + } +) + +legacy_cfg = LegacyConfigManager() + +ConfigManager.__doc__ = """\ +Do not use anymore + +.. deprecated:: 1.6 + + The use of this class is discouraged. It is a legacy import from the + ``datalad`` package, and a near drop-in replacement is provided with + :class:`LegacyConfigManager`. Moreover, a :class:`LegacyConfigManager`-based + instance of a global configuration manager is available as a + :obj:`datalad_next.config.legacy_cfg` object in this module. + + New implementation are encourage to use the + :obj:`datalad_next.config.manager` object (and instance of + :class:`MultiConfiguration`) to query and manipulate configuration items. +""" diff --git a/datalad_next/config/default.py b/datalad_next/config/default.py new file mode 100644 index 00000000..b77eb2c2 --- /dev/null +++ b/datalad_next/config/default.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import logging +from typing import ( + Any, + Callable, +) + +# momentarily needed for the legacy_register_config() +# implementation. +from datalad.interface.common_cfg import definitions # type: ignore +from datalad.support.extensions import ( # type: ignore + register_config as _legacy_register_config, +) +from datasalad.settings import Defaults + +from datalad_next.config.dialog import get_dialog_class_from_legacy_ui_label +from datalad_next.config.item import ( + ConfigurationItem, + UnsetValue, +) +from datalad_next.constraints import ( + Constraint, + EnsureNone, +) + +lgr = logging.getLogger('datalad.config') + + +class ImplementationDefault(Defaults): + def __str__(self): + return 'ImplementationDefaults' + + +# +# legacy support tooling from here. +# non of this is executed by the code above. It has to be triggered manually +# and pointed to an instance of ImplementationDefaults +# + +def load_legacy_defaults(source: ImplementationDefault) -> None: + for name, cfg in definitions.items(): + if 'default' not in cfg: + lgr.debug( + 'Configuration %r has no default(_fn), not registering', + name + ) + continue + + cfg_props = cfg._props + ui = cfg_props.get('ui', None) + if ui is not None: + dialog = get_dialog_class_from_legacy_ui_label(ui[0])( + title=ui[1]['title'], + text=ui[1].get('text', ''), + ) + else: + dialog = None + + coercer = cfg_props.get('type') + if name == 'datalad.tests.temp.dir': + # https://github.com/datalad/datalad/issues/7662 + coercer = coercer | EnsureNone() + + default = cfg_props.get('default', UnsetValue) + default_fn = cfg_props.get('default_fn') + + source[name] = ConfigurationItem( + default_fn if default_fn else default, + validator=coercer, + lazy=default_fn is not None, + dialog=dialog, + store_target=get_store_target_from_destination_label( + cfg_props.get('destination'), + ), + ) + + +def legacy_register_config( + source: ImplementationDefault, + name: str, + title: str, + *, + default: Any = UnsetValue, + default_fn: Callable | type[UnsetValue] = UnsetValue, + description: str | None = None, + type: Constraint | None = None, # noqa: A002 + dialog: str | None = None, + scope: str | type[UnsetValue] = UnsetValue, +): + source[name] = ConfigurationItem( + default_fn if default_fn else default, + validator=type, + lazy=default_fn is not None, + dialog=None if dialog is None + else get_dialog_class_from_legacy_ui_label(dialog)( + title=title, + text=description or '', + ), + store_target=get_store_target_from_destination_label(scope), + ) + + # lastly trigger legacy registration + _legacy_register_config( + name=name, + title=title, + default=default, + default_fn=default_fn, + description=description, + type=type, + dialog=dialog, + scope=scope, + ) + + +def get_store_target_from_destination_label( + label: str | UnsetValue | None, +) -> str | None: + if label in (None, UnsetValue): + return None + if label == 'global': + return 'GlobalGitConfig' + if label == 'local': + return 'LocalGitConfig' + if label == 'dataset': + return 'DatasetBranchConfig' + msg = f'unsupported configuration destination label {label!r}' + raise ValueError(msg) diff --git a/datalad_next/config/dialog.py b/datalad_next/config/dialog.py new file mode 100644 index 00000000..13858afd --- /dev/null +++ b/datalad_next/config/dialog.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from dataclasses import dataclass + +__all__ = [ + 'Dialog', + 'Question', + 'YesNo', + 'Choice', +] + + +# only from PY3.10 +# @dataclass(kw_only=True) +@dataclass +class Dialog: + title: str + text: str | None = None + + +@dataclass +class Question(Dialog): + pass + + +@dataclass +class YesNo(Dialog): + pass + + +@dataclass +class Choice(Dialog): + pass + + +def get_dialog_class_from_legacy_ui_label(label: str) -> type[Dialog]: + """Recode legacy `datalad.interface.common_cfg` UI type label""" + if label == 'yesno': + return YesNo + elif label == 'question': + return Question + else: + msg = f'unknown UI type label {label!r}' + raise ValueError(msg) diff --git a/datalad_next/config/env.py b/datalad_next/config/env.py new file mode 100644 index 00000000..b60ef74b --- /dev/null +++ b/datalad_next/config/env.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import json +import logging +from os import environ +from typing import Any + +from datasalad.settings import CachingSource + +from datalad_next.config.item import ConfigurationItem + +lgr = logging.getLogger('datalad.config') + + +class LegacyEnvironment(CachingSource): + """ + All loaded items have a ``store_target`` of ``Environment``, assuming + that if they are loaded from the environment, a modification can + also target the environment again. + """ + is_writable = False + + def load(self) -> None: + # not resetting here, incremental load + for k, v in self._load_legacy_overrides().items(): + self._items[k] = ConfigurationItem(value=v) + for k in environ: + if not k.startswith('DATALAD_'): + continue + # translate variable name to config item key + item_key = k.replace('__', '-').replace('_', '.').lower() + self._items[item_key] = ConfigurationItem(value=environ[k]) + + def _load_legacy_overrides(self) -> dict[str, Any]: + try: + return { + str(k): v + for k, v in json.loads( + environ.get("DATALAD_CONFIG_OVERRIDES_JSON", '{}') + ).items() + } + except json.decoder.JSONDecodeError as exc: + lgr.warning( + "Failed to load DATALAD_CONFIG_OVERRIDES_JSON: %s", + exc, + ) + return {} + + def __str__(self): + return 'LegacyEnvironment' + + def __repr__(self): + return 'LegacyEnvironment()' diff --git a/datalad_next/config/git.py b/datalad_next/config/git.py new file mode 100644 index 00000000..1deda799 --- /dev/null +++ b/datalad_next/config/git.py @@ -0,0 +1,245 @@ + +from __future__ import annotations + +import logging +import re +from abc import abstractmethod +from pathlib import Path +from typing import ( + TYPE_CHECKING, +) + +if TYPE_CHECKING: + from os import PathLike + + from datasalad.settings import Setting + +from datasalad.itertools import ( + decode_bytes, + itemize, +) +from datasalad.runners import CommandError as SaladCommandError +from datasalad.settings import CachingSource + +from datalad.consts import DATASET_CONFIG_FILE + +from datalad_next.config.item import ConfigurationItem +from datalad_next.runners import ( + CommandError, + call_git, + call_git_oneline, + iter_git_subproc, +) + +lgr = logging.getLogger('datalad.config') + + +class GitConfig(CachingSource): + """Configuration source using git-config to read and write""" + is_writable = True + + @abstractmethod + def _get_git_config_cmd(self) -> list[str]: + """Return the git-config command suitable for a particular config""" + + @abstractmethod + def _get_git_config_cwd(self) -> Path: + """Return path the git-config command should run in""" + + def reinit(self) -> None: + super().reinit() + self._sources: set[str | Path] = set() + + def load(self) -> None: + cwd = self._get_git_config_cwd() + dct: dict[str, str | tuple[str, ...]] = {} + fileset: set[str] = set() + + try: + with iter_git_subproc( + [*self._get_git_config_cmd(), + '--show-origin', '--list', '-z'], + input=None, + cwd=cwd, + ) as gitcfg: + for line in itemize( + decode_bytes(gitcfg), + sep='\0', + keep_ends=False, + ): + _proc_dump_line(line, fileset, dct) + except (CommandError, SaladCommandError): + # TODO: only pass for the case where no corresponding + # source is found. E.g., it fails with --system whenever + # there is no /etc/gitconfig + pass + + # take blobs with verbatim markup + origin_blobs = {f for f in fileset if f.startswith('blob:')} + # convert file specifications to Path objects with absolute paths + origin_paths = {Path(f[5:]) for f in fileset if f.startswith('file:')} + origin_paths = {f if f.is_absolute() else cwd / f for f in origin_paths} + # TODO: add "version" tracking. The legacy config manager used mtimes + # and we will too. but we also need to ensure that the version for + # the "blobs" is known + self._sources = origin_paths.union(origin_blobs) + + for k, v in dct.items(): + if isinstance(v, tuple): + vals = tuple( + ConfigurationItem( + value=val, + store_target=self.__class__, + ) + for val in v + ) + else: + vals = ConfigurationItem( + value=v, + store_target=self.__class__, + ) + super().__setitem__(k, vals) + + + def __setitem__(self, key: str, value: Setting) -> None: + call_git( + [*self._get_git_config_cmd(), '--replace-all', key, str(value.value)], + capture_output=True, + ) + super().__setitem__(key, value) + + def add(self, key: str, value: Setting) -> None: + call_git( + [*self._get_git_config_cmd(), '--add', key, str(value.value)], + capture_output=True, + + ) + super().add(key, value) + + +class SystemGitConfig(GitConfig): + def _get_git_config_cmd(self) -> list[str]: + return ['config', '--system'] + + def _get_git_config_cwd(self) -> Path: + return Path.cwd() + + +class GlobalGitConfig(GitConfig): + def _get_git_config_cmd(self) -> list[str]: + return ['config', '--global'] + + def _get_git_config_cwd(self) -> Path: + return Path.cwd() + + +class LocalGitConfig(GitConfig): + def __init__(self, path: PathLike): + super().__init__() + self._path = path + self._is_bare_repo = call_git_oneline( + ['rev-parse', '--is-bare-repository'], + cwd=path, + force_c_locale=True, + ) == 'true' + + def _get_git_config_cmd(self) -> list[str]: + return ['-C', str(self._path), 'config', '--local'] + + def _get_git_config_cwd(self) -> Path: + return self._path + + +class DataladBranchConfig(LocalGitConfig): + def _get_git_config_cmd(self) -> list[str]: + return [ + '-C', str(self._path), + 'config', + *(('--blob', 'HEAD:.datalad/config') if self._is_bare_repo else + ('--file', str(self._path / DATASET_CONFIG_FILE))), + ] + + +def _proc_dump_line( + line: str, + fileset: set[str], + dct: dict[str, str | tuple[str, ...]], +) -> None: + # line is a null-delimited chunk + k = None + # in anticipation of output contamination, process within a loop + # where we can reject non syntax compliant pieces + while line: + if line.startswith(('file:', 'blob:')): + fileset.add(line) + break + if line.startswith('command line:'): + # no origin that we could as a pathobj + break + # try getting key/value pair from the present chunk + k, v = _gitcfg_rec_to_keyvalue(line) + if k is not None: + # we are done with this chunk when there is a good key + break + # discard the first line and start over + ignore, line = line.split('\n', maxsplit=1) + lgr.debug('Non-standard git-config output, ignoring: %s', ignore) + if not k: + # nothing else to log, all ignored dump was reported before + return + if TYPE_CHECKING: + assert k is not None + if v is None: + # man git-config: + # just name, which is a short-hand to say that the variable is + # the boolean + #v = "true" + # BUUUUUT datalad of old want it to stay `None` + # BUUUUUUUUT it also want it to be reported as True later on + v = None + # multi-value reporting + present_v = dct.get(k) + if present_v is None: + dct[k] = v + elif isinstance(present_v, tuple): + dct[k] = (*present_v, v) + else: + dct[k] = (present_v, v) + + +# git-config key syntax with a section and a subsection +# see git-config(1) for syntax details +cfg_k_regex = re.compile(r'([a-zA-Z0-9-.]+\.[^\0\n]+)$', flags=re.MULTILINE) +# identical to the key regex, but with an additional group for a +# value in a null-delimited git-config dump +cfg_kv_regex = re.compile( + r'([a-zA-Z0-9-.]+\.[^\0\n]+)\n(.*)$', + flags=re.MULTILINE | re.DOTALL +) + + +def _gitcfg_rec_to_keyvalue(rec: str) -> tuple[str | None, str | None]: + """Helper for parse_gitconfig_dump() + + Parameters + ---------- + rec: str + Key/value specification string + + Returns + ------- + str, str + Parsed key and value. Key and/or value could be None + if not syntax-compliant (former) or absent (latter). + """ + kv_match = cfg_kv_regex.match(rec) + if kv_match: + k, v = kv_match.groups() + elif cfg_k_regex.match(rec): + # could be just a key without = value, which git treats as True + # if asked for a bool + k, v = rec, None + else: + # no value, no good key + k = v = None + return k, v diff --git a/datalad_next/config/item.py b/datalad_next/config/item.py new file mode 100644 index 00000000..f2ae5510 --- /dev/null +++ b/datalad_next/config/item.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +from datasalad.settings import Setting +from datasalad.settings.setting import UnsetValue as SaladUnsetValue +from typing_extensions import TypeAlias + +if TYPE_CHECKING: + from datasalad.settings import Source + + from datalad_next.config import ( + Dialog, + dialog as dialog_collection, + ) + from datalad_next.constraints import Constraint + +from datalad.interface.common_cfg import _NotGiven # type: ignore + +# make a type alias with a subjectively more self-explaining name +# we reuse the core type to keep checking code here simple, and +# easy to migrate later +UnsetValue: TypeAlias = _NotGiven +#UnsetValue: TypeAlias = SaladUnsetValue + + +class ConfigurationItem(Setting): + def __init__( + self, + value: Any | UnsetValue = UnsetValue, + *, + validator: Constraint | Callable | None = None, + lazy: bool = False, + dialog: dialog_collection.Dialog | None = None, + store_target: type[Source] | str | None = None, + ): + """ + - Value of a configuration item + - Type or validator of the configuration value + - Hint how a UI should gather a value for this item + - Hint with which configuration source this item should be stored + + Any hint should be a type. + + If a string label is given, it will be interpreted as a class name. + This functionality is deprecated and is only supported, for the time + being, to support legacy implementations. It should not be used for any + new implementations. + """ + super().__init__( + value=SaladUnsetValue if value is UnsetValue else value, + coercer=validator, + lazy=lazy, + ) + self._dialog = dialog + self._store_target = store_target + + @property + def dialog(self) -> Dialog | None: + return self._dialog + + @property + def value(self) -> Any: + val = super().value + if val is SaladUnsetValue: + return UnsetValue + return val + + @property + def validator(self) -> Callable | None: + return self.coercer + + def update(self, item: Setting) -> None: + super().update(item) + for attr in ('_dialog', '_store_target'): + val = getattr(item, attr) + if val is not None: + setattr(self, attr, val) diff --git a/datalad_next/config/legacy.py b/datalad_next/config/legacy.py new file mode 100644 index 00000000..f11f78c1 --- /dev/null +++ b/datalad_next/config/legacy.py @@ -0,0 +1,502 @@ +"""`MultiConfiguration` adaptor for `ConfigManager` drop-in replacement""" + +from __future__ import annotations + +from copy import copy +from functools import wraps +from types import MappingProxyType +from typing import ( + TYPE_CHECKING, + Any, +) +import warnings + +if TYPE_CHECKING: + from datalad.distribution.dataset import Dataset # type: ignore + from datalad.support.gitrepo import GitRepo # type: ignore + from datasalad.settings import Source + +from datasalad.settings import ( + InMemorySettings, + Setting, + Settings, +) +from datasalad.settings.setting import UnsetValue as SaladUnsetValue + +from datalad_next.config.item import ( + ConfigurationItem, + UnsetValue, +) +from datalad_next.config import dialog +from datalad_next.config.git import ( + LocalGitConfig, + DataladBranchConfig, +) +from datalad_next.runners import ( + call_git, +) + + +def _where_to_scope(func): + @wraps(func) + def wrapper(*args, **kwargs): + if 'where' in kwargs: + if 'scope' in kwargs: + raise ValueError("Do not specify both 'scope' and DEPRECATED 'where'") + kwargs = kwargs.copy() + where = kwargs.pop('where') + if where == 'dataset': + warnings.warn("'where=\"dataset\"' is deprecated, use 'scope=\"branch\"' instead", + DeprecationWarning) + where = 'branch' + else: + warnings.warn("'where' is deprecated, use 'scope' instead", + DeprecationWarning) + kwargs['scope'] = where + return func(*args, **kwargs) + return wrapper + + +class ConfigManager: + def __init__( + self, + dataset: GitRepo | Dataset | None = None, + overrides=None, + source='any', + ): + # to new code, while new code already uses the new interface + from datalad_next.config import manager + + self._mngr = Settings(get_sources( + manager, + dataset=dataset, + overrides=overrides, + source=source, + )) + self._defaults = manager.sources['defaults'] + for src in self._mngr.sources.values(): + src.load() + + # TODO: make obsolete + self._repo_dot_git = None + self._repo_pathobj = None + if dataset: + if hasattr(dataset, 'dot_git'): + # `dataset` is actually a Repo instance + self._repo_dot_git = dataset.dot_git + self._repo_pathobj = dataset.pathobj + elif dataset.repo: + self._repo_dot_git = dataset.repo.dot_git + self._repo_pathobj = dataset.repo.pathobj + + @property + def overrides(self): + # this is a big hassle. the original class hands out the real dict to do any + # manipulation with it. for a transition we want to keep some control, and + # hand out a proxy only + return MappingProxyType(self._mngr.sources['legacy-overrides']._items) + + @property + def _stores(self): + # this beast only exists to satisfy a test that reaches into the + # internals (that no longer exists) and verifies that the right + # source files are read + files = set() + # only for tests + for label in ['git-system', 'git-global', 'git-local']: + src = self._mngr.sources.get(label) + if src is None: + continue + src.load() + files.update(src._sources) + return {'git': {'files': files}} + + def reload(self, force: bool = False) -> None: + for s in self._mngr.sources.values(): + s.load() + + def obtain(self, var, default=None, dialog_type=None, valtype=None, + store=False, scope=None, reload=True, **kwargs): + # maybe we have a default + item = copy(self._defaults.get(var, ConfigurationItem(UnsetValue))) + if valtype is not None: + item._coercer = valtype + item.update(self._mngr.get(var, ConfigurationItem(UnsetValue))) + + # we need to check for the salad value if reaching into the guts + if item._value is not SaladUnsetValue: + # might crash here, if not valid, but we want that + return item.value + + # configure storage destination, if needed + #if store: + # if scope is None and 'destination' in cdef: + # scope = cdef['destination'] + # if scope is None: + # raise ValueError( + # "request to store configuration item '{}', but no " + # "storage destination specified".format(var)) + + if dialog_type is not None: + item._dialog = dialog.get_dialog_class_from_legacy_ui_label( + dialog_type)(**kwargs) + + if store and item._store_target is None: + msg = ( + f"request to store configuration item {var!r}, but no " + "storage destination specified" + ) + raise ValueError(msg) + + # `default` here is different from what one would think. It is the + # default to present to the user when asking for a value. + val = self._obtain_from_user( + var, + item, + default=default, + ) + + item._value = val + + # TODO: should loop if something invalid was entered. Do better + # in reimplementation + validated = item.value + + if store: + src = self.get_src(item._store_target) + src.add(var, item) + if reload: + src.load() + return validated + + def _obtain_from_user( + self, + var, + default_item, + default=None, + valtype=None, + **kwargs, + ): + # now we need to try to obtain something from the user + from datalad.ui import ui + + if (not ui.is_interactive or default_item.dialog is None) and default is None: + raise RuntimeError( + "cannot obtain value for configuration item '{}', " + "not preconfigured, no default, no UI available".format(var)) + + # obtain via UI + try: + dialog_cls = getattr( + ui, + { + dialog.Question: 'question', + dialog.YesNo: 'yesno', + }[type(default_item.dialog)], + ) + except KeyError: + msg = f"UI {ui!r} does not support dialog {default_item.dialog!r}" + raise ValueError(msg) + + _value = dialog_cls( + default=default, + title=default_item.dialog.title, + text=default_item.dialog.text, + ) + + if _value is None: + # we got nothing + if default is None: + raise RuntimeError( + "could not obtain value for configuration item '{}', " + "not preconfigured, no default".format(var)) + # XXX maybe we should return default here, even it was returned + # from the UI -- if that is even possible + + return _value + + + def __repr__(self): + # give full list of all tracked config sources, plus overrides + return "ConfigManager({}{})".format( + [str(s) for s in self._mngr.sources.values()], + f', overrides={self.overrides!r}' + if self.overrides else '', + ) + + def __str__(self): + # give path of dataset, if there is any, plus overrides + return "ConfigManager({}{})".format( + self._repo_pathobj if self._repo_pathobj else '', + 'with overrides' if self.overrides else '', + ) + + def __len__(self) -> int: + return len(self._mngr) + + def __getitem__(self, key: str) -> Any: + # the legacy implementation returned all values here + val = self.get( + key, + default=UnsetValue, + get_all=True, + ) + if len(val) == 1: + val = val[0] + if not isinstance(val, tuple) and val is UnsetValue: + raise KeyError + return val + + def __contains__(self, key) -> bool: + return key in self._mngr + + def keys(self): + return self._mngr.keys() + + def get(self, key, default=None, get_all=False): + val = self._mngr.getall(key, ConfigurationItem(default)) + if not get_all and isinstance(val, tuple): + return val[-1].value + return tuple(v.value for v in val) + + def get_from_source(self, source, key, default=None): + src = self.get_src(source) + return src.get(key, default).value + + def sections(self): + """Returns a list of the sections available""" + return list(set([ + '.'.join(k.split('.')[:-1]) for k in self._mngr.keys() + ])) + + def options(self, section): + return [ + k.split('.')[-1] for k in self._mngr.keys() + if k.startswith(f'{section}.') + ] + + def has_section(self, section): + """Indicates whether a section is present in the configuration""" + # TODO: next one is the proper implementation, but core tests + # force us to do it wrong + #return any(k.startswith(f'{section}.') for k in self._mngr.keys()) + return any(k.startswith(section) for k in self._mngr.keys()) + + def has_option(self, section, option): + return f'{section}.{option}' in self._mngr + + def getint(self, section, option): + return int(self._mngr[f'{section}.{option}'].value) + + def getfloat(self, section, option): + return float(self._mngr[f'{section}.{option}'].value) + + def getbool(self, section, option, default=None): + return anything2bool(self._mngr.get( + f'{section}.{option}', + default=default).value) + + def items(self, section=None): + prefix = f'{section}.' if section else '' + return [ + (k, self[k]) for k in self._mngr.keys() + if k.startswith(prefix) + ] + + def get_value(self, section, option, default=None): + key = f'{section}.{option}' + if key not in self._mngr.keys() and default is None: + # this strange dance is needed because gitpython did + # it this way + raise KeyError + return self._mngr.get( + f'{section}.{option}', + default=default).value + + def add(self, var, value, scope='branch', reload=True): + src = self.get_src(scope) + # there would be no need for a reload, but the core tests + # enforce no direct updating of the available knowledge + src.add(var, ConfigurationItem(value)) + + @_where_to_scope + def set(self, var, value, scope='branch', reload=True, force=False): + src = self.get_src(scope) + if scope == 'override': + src._items[var] = ConfigurationItem(value) + return + cmd = [*src._get_git_config_cmd()] + if force: + cmd.append('--replace-all') + call_git( + [*cmd, var, value], + capture_output=True, + + ) + if reload: + src.reinit() + src.load() + + def rename_section(self, old, new, scope='branch', reload=True): + src = self.get_src(scope) + if scope == 'override': + for k in list(src._items.keys()): + if k.startswith(f'{old}.'): + src._items[f'{new}.{k.split(".")[-1]}'] = src._items[k] + del src._items[k] + return + call_git( + [*src._get_git_config_cmd(), '--rename-section', old, new], + capture_output=True, + + ) + if reload: + src.reinit() + src.load() + + def remove_section(self, sec, scope='branch', reload=True): + src = self.get_src(scope) + if scope == 'override': + for k in list(src._items.keys()): + if k.startswith(f'{sec}.'): + del src._items[k] + + def unset(self, var, scope='branch', reload=True): + src = self.get_src(scope) + if scope == 'override': + del src[var] + return + call_git( + [*src._get_git_config_cmd(), '--unset-all', var], + capture_output=True, + + ) + if reload: + src.reinit() + src.load() + + def get_src(self, scope): + name = scope_label_to_source_label_map.get(scope) + if name is None: + raise ValueError(f'unknown scope {scope!r}') + return self._mngr.sources[name] + + +scope_label_to_source_label_map = { + 'branch': 'datalad-branch', + 'local': 'git-local', + 'global': 'git-global', + 'override': 'legacy-overrides', + # old names + 'dataset': 'datalad-branch', +} + + +def get_sources( + manager: Settings, + dataset: GitRepo | Dataset | None = None, + overrides=None, + source='any', +) -> dict[str, Source]: + """Implement the legacy ruleset of what to read from + + Parameters + ---------- + source : {'any', 'local', 'branch', 'branch-local'}, optional + Which sources of configuration setting to consider. If 'branch', + configuration items are only read from a dataset's persistent + configuration file in current branch, if any is present + (the one in ``.datalad/config``, not + ``.git/config``); if 'local', any non-committed source is considered + (local and global configuration in Git config's terminology); + if 'branch-local', persistent configuration in current dataset branch + and local, but not global or system configuration are considered; if 'any' + all possible sources of configuration are considered. + Note: 'dataset' and 'dataset-local' are deprecated in favor of 'branch' + and 'branch-local'. + """ + nodataset_errmsg = ( + 'ConfigManager configured to read from a branch of a dataset only, ' + 'but no dataset given' + ) + # if applicable, we want to reuse the exact same source instances as the + # global non-legacy manager to get a somewhat smooth transition of old code + global_sources = manager.sources + + ovsrc = InMemorySettings() + if overrides is not None: + for k, v in overrides.items(): + ovsrc[k] = ConfigurationItem(v) + # + # No scenario can return Defaults(), the legacy manager did not + # have that + # + if source == 'branch': + if dataset is None: + raise ValueError(nodataset_errmsg) + return { + 'legacy-overrides': ovsrc, + 'datalad-branch': DataladBranchConfig(dataset.pathobj), + } + if source == 'local': + if not dataset: + return { + 'legacy-environment': global_sources['legacy-environment'], + 'legacy-overrides': ovsrc, + 'git-global': global_sources['git-global'], + 'git-system': global_sources['git-system'], + } + return { + 'legacy-environment': global_sources['legacy-environment'], + 'legacy-overrides': ovsrc, + 'git-local': LocalGitConfig(dataset.pathobj), + 'git-global': global_sources['git-global'], + 'git-system': global_sources['git-system'], + } + if source == 'branch-local': + return { + 'legacy-overrides': ovsrc, + 'git-local': LocalGitConfig(dataset.pathobj), + 'datalad-branch': DataladBranchConfig(dataset.pathobj), + } + if source == 'any': + # the full stack + if not dataset: + return { + 'legacy-environment': global_sources['legacy-environment'], + 'legacy-overrides': ovsrc, + 'git-global': global_sources['git-global'], + 'git-system': global_sources['git-system'], + } + return { + 'legacy-environment': global_sources['legacy-environment'], + 'legacy-overrides': ovsrc, + 'git-local': LocalGitConfig(dataset.pathobj), + 'git-global': global_sources['git-global'], + 'git-system': global_sources['git-system'], + 'datalad-branch': DataladBranchConfig(dataset.pathobj), + } + + raise ValueError(f'unknown configuration source {source!r}') + + +def anything2bool(val): + if val is None: + # TODO: just changes this behavior + # forced by a test in old core that forces _proc_dump_line + # to work this way + return True + if val == '': + return False + if hasattr(val, 'lower'): + val = val.lower() + if val in {"off", "no", "false", "0"} or not bool(val): + return False + elif val in {"on", "yes", "true", True} \ + or (hasattr(val, 'isdigit') and val.isdigit() and int(val)) \ + or isinstance(val, int) and val: + return True + else: + raise TypeError( + "Got value %s which could not be interpreted as a boolean" + % repr(val)) diff --git a/datalad_next/config/tests/test_core.py b/datalad_next/config/tests/test_core.py index 56d59146..666d324e 100644 --- a/datalad_next/config/tests/test_core.py +++ b/datalad_next/config/tests/test_core.py @@ -1,3 +1,5 @@ +# mypy: ignore-errors + import logging import os from os.path import exists @@ -40,6 +42,11 @@ get_home_envvars, swallow_logs, ) +from datalad_next.config import dialog +from datalad_next.config.item import ( + ConfigurationItem, + UnsetValue, +) # XXX tabs are intentional (part of the format)! # XXX put back! confuses pep8 @@ -194,7 +201,8 @@ def test_something(path=None, new_home=None): # batch a changes cfg.add('mike.wants.to', 'know', reload=False) - assert_false('mike.wants.to' in cfg) + # next assert is not valid anymore, we have immediate availability + #assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) @@ -246,19 +254,24 @@ def test_something(path=None, new_home=None): globalcfg = ConfigManager() assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) - cfg.add('datalad.unittest.youcan', 'removeme', scope='global') + # next line made no sense, `cfg` is configured to only + # deal with "branch" config + #cfg.add('datalad.unittest.youcan', 'removeme', scope='global') + globalcfg.add('datalad.unittest.youcan', 'removeme', scope='global') assert(exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) - with swallow_logs(): - assert_raises( - CommandError, - globalcfg.unset, - 'datalad.unittest.youcan', - scope='local') + # next lines makes no sense, neither `cfg` nor `globalcfg` + # deal with the "local" config + #with swallow_logs(): + # assert_raises( + # CommandError, + # globalcfg.unset, + # 'datalad.unittest.youcan', + # scope='local') assert(globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', scope='global') # but after we unset the only value -- that section is no longer listed @@ -334,8 +347,7 @@ def test_obtain(path=None): # don't hide type issues, float doesn't become an int magically assert_raises(ValueError, cfg.obtain, dummy, valtype=int) # inject some prior knowledge - from datalad.interface.common_cfg import definitions as cfg_defs - cfg_defs[dummy] = dict(type=float) + cfg._defaults[dummy] = ConfigurationItem(UnsetValue, validator=float) # no we don't need to specify a type anymore assert_equal(cfg.obtain(dummy), 5.3) # but if we remove the value from the config, all magic is gone @@ -356,11 +368,11 @@ def ask(): @with_testsui(responses='5.3') def ask(): assert_equal( - cfg.obtain(dummy, dialog_type='question', text='Tell me'), 5.3) + cfg.obtain(dummy, dialog_type='question', title='Tell me'), 5.3) ask() # preconfigure even more, to get the most compact call - cfg_defs[dummy]['ui'] = ('question', dict(text='tell me', title='Gretchen Frage')) + cfg._defaults[dummy]._dialog = dialog.Question(text='tell me', title='Gretchen Frage') @with_testsui(responses='5.3') def ask(): @@ -379,7 +391,7 @@ def ask(): ask() # but we can preconfigure it - cfg_defs[dummy]['destination'] = 'broken' + cfg._defaults[dummy]._store_target = 'broken' @with_testsui(responses='5.3') def ask(): @@ -387,7 +399,7 @@ def ask(): ask() # fixup destination - cfg_defs[dummy]['destination'] = 'branch' + cfg._defaults[dummy]._store_target = 'branch' @with_testsui(responses='5.3') def ask(): @@ -412,7 +424,7 @@ def ask(): #ask() -def test_from_env(): +def test_from_env(existing_dataset): cfg = ConfigManager() assert_not_in('datalad.crazy.cfg', cfg) with patch.dict('os.environ', @@ -421,7 +433,7 @@ def test_from_env(): assert_in('datalad.crazy.cfg', cfg) assert_equal(cfg['datalad.crazy.cfg'], 'impossibletoguess') # not in dataset-only mode - cfg = ConfigManager(Dataset('nowhere'), source='branch') + cfg = ConfigManager(existing_dataset, source='branch') assert_not_in('datalad.crazy.cfg', cfg) # check env trumps override cfg = ConfigManager() @@ -432,7 +444,7 @@ def test_from_env(): with patch.dict('os.environ', {'DATALAD_CRAZY_OVERRIDE': 'fromenv'}): cfg.reload() - assert_equal(cfg['datalad.crazy.override'], 'fromenv') + assert_equal(cfg.get('datalad.crazy.override'), 'fromenv') def test_from_env_overrides(): @@ -481,7 +493,7 @@ def test_overrides(): assert_in('user.name', cfg) # set cfg.set('user.name', 'myoverride', scope='override') - assert_equal(cfg['user.name'], 'myoverride') + assert_equal(cfg.get('user.name'), 'myoverride') # unset just removes override, not entire config cfg.unset('user.name', scope='override') assert_in('user.name', cfg) @@ -489,19 +501,19 @@ def test_overrides(): # add # there is no initial increment cfg.add('user.name', 'myoverride', scope='override') - assert_equal(cfg['user.name'], 'myoverride') + assert_equal(cfg.get('user.name'), 'myoverride') # same as with add, not a list - assert_equal(cfg['user.name'], 'myoverride') + assert_equal(cfg.get('user.name'), 'myoverride') # but then there is cfg.add('user.name', 'myother', scope='override') - assert_equal(cfg['user.name'], ['myoverride', 'myother']) + assert_equal(cfg.get('user.name', get_all=True)[-2:], ('myoverride', 'myother')) # rename assert_not_in('ups.name', cfg) cfg.rename_section('user', 'ups', scope='override') # original variable still there assert_in('user.name', cfg) # rename of override in effect - assert_equal(cfg['ups.name'], ['myoverride', 'myother']) + assert_equal(cfg.get('ups.name', get_all=True)[-2:], ('myoverride', 'myother')) # remove entirely by section cfg.remove_section('ups', scope='override') from datalad.utils import Path @@ -573,16 +585,17 @@ def test_no_leaks(path1=None, path2=None): ds2.create() assert_not_in('i.was.here', ds2.config.keys()) - # and that we do not track the wrong files - assert_not_in(ds1.pathobj / '.git' / 'config', - ds2.config._stores['git']['files']) - assert_not_in(ds1.pathobj / '.datalad' / 'config', - ds2.config._stores['branch']['files']) - # these are the right ones - assert_in(ds2.pathobj / '.git' / 'config', - ds2.config._stores['git']['files']) - assert_in(ds2.pathobj / '.datalad' / 'config', - ds2.config._stores['branch']['files']) + # internals do not exist anymore + # # and that we do not track the wrong files + # assert_not_in(ds1.pathobj / '.git' / 'config', + # ds2.config._stores['git']['files']) + # assert_not_in(ds1.pathobj / '.datalad' / 'config', + # ds2.config._stores['branch']['files']) + # # these are the right ones + # assert_in(ds2.pathobj / '.git' / 'config', + # ds2.config._stores['git']['files']) + # assert_in(ds2.pathobj / '.datalad' / 'config', + # ds2.config._stores['branch']['files']) @with_tempfile() @@ -590,7 +603,8 @@ def test_no_local_write_if_no_dataset(path=None): Dataset(path).create() with chpwd(path): cfg = ConfigManager() - with assert_raises(CommandError): + # KeyError because the scope is not known + with assert_raises(KeyError): cfg.set('a.b.c', 'd', scope='local') @@ -661,13 +675,14 @@ def test_bare(src=None, path=None): assert_true(gr.bare) # do we read the correct local config? assert_in(gr.pathobj / 'config', gr.config._stores['git']['files']) - # do we pick up the default branch config too? - assert_in('blob:HEAD:.datalad/config', - gr.config._stores['branch']['files']) + # these internals are no longer valid + ## do we pick up the default branch config too? + #assert_in('blob:HEAD:.datalad/config', + # gr.config._stores['branch']['files']) # and track its reload stamp via its file shasum - assert_equal( - dlconfig_sha, - gr.config._stores['branch']['stats']['blob:HEAD:.datalad/config']) + #assert_equal( + # dlconfig_sha, + # gr.config._stores['branch']['stats']['blob:HEAD:.datalad/config']) # check that we can pick up the dsid from the commit branch config assert_equal(ds.id, gr.config.get('datalad.dataset.id')) # and it is coming from the correct source @@ -800,7 +815,7 @@ def test_cross_cfgman_update(datalad_cfg, tmp_path): # there is no dataset to write to, it rejects it rightfully # it is a bit versatile in its exception behavior # https://github.com/datalad/datalad/issues/7300 - with pytest.raises((ValueError, CommandError)): + with pytest.raises((ValueError, CommandError, KeyError)): ds.config.set(myuniqcfg, myuniqcfg_value, scope='local') # but we can write to global scope ds.config.set(myuniqcfg, myuniqcfg_value, scope='global') diff --git a/datalad_next/config/tests/test_env.py b/datalad_next/config/tests/test_env.py new file mode 100644 index 00000000..7c9023db --- /dev/null +++ b/datalad_next/config/tests/test_env.py @@ -0,0 +1,55 @@ +from ..env import LegacyEnvironment +from ..item import ConfigurationItem + + +def test_environment(): + env = LegacyEnvironment() + assert str(env) == 'LegacyEnvironment' + assert repr(env) == 'LegacyEnvironment()' + + +def test_load_datalad_env(monkeypatch): + target_key = 'datalad.chunky-monkey.feedback' + target_value = 'ohmnomnom' + absurd_must_be_absent_key = 'nobody.would.use.such.a.key' + with monkeypatch.context() as m: + m.setenv('DATALAD_CHUNKY__MONKEY_FEEDBACK', 'ohmnomnom') + env = LegacyEnvironment() + assert target_key in env.keys() # noqa: SIM118 + assert target_key in env + assert env.get(target_key).value == target_value + # default is wrapped into ConfigurationItem if needed + assert env.get( + absurd_must_be_absent_key, + target_value + ).value is target_value + assert env.get( + absurd_must_be_absent_key, + ConfigurationItem(value=target_value) + ).value is target_value + assert env[target_key].value == target_value + assert env.get(absurd_must_be_absent_key).value is None + assert len(env) + + +def test_load_legacy_overrides(monkeypatch, caplog): + with monkeypatch.context() as m: + m.setenv( + 'DATALAD_CONFIG_OVERRIDES_JSON', + '{"datalad.key1":"override", "datalad.key2":"override"}', + ) + m.setenv('DATALAD_KEY1', 'evenmoreoverride') + env = LegacyEnvironment() + assert env['datalad.key1'].value == 'evenmoreoverride' + assert env.get('datalad.key2').value == 'override' + + assert 'Failed to load' not in caplog.text + with monkeypatch.context() as m: + m.setenv( + 'DATALAD_CONFIG_OVERRIDES_JSON', + '{"datalad.key1":NOJSON, "datalad.key2":"override"}', + ) + env = LegacyEnvironment() + assert 'datalad.key1' not in env + assert 'datalad.key2' not in env + assert 'Failed to load' in caplog.text diff --git a/datalad_next/config/tests/test_git.py b/datalad_next/config/tests/test_git.py new file mode 100644 index 00000000..4b95b9e0 --- /dev/null +++ b/datalad_next/config/tests/test_git.py @@ -0,0 +1,24 @@ +from ..git import ( + GlobalGitConfig, +) +from ..item import ConfigurationItem + + +def test_global_git_config(datalad_cfg): + target_key = 'my.config.key' + target_value = 'my/config.value' + + gc = GlobalGitConfig() + gc[target_key] = ConfigurationItem(value=target_value) + # immediate availability + assert target_key in gc + assert gc[target_key].value == target_value + + # if we create another instance, it also has the key, because + # we wrote to a file, not just the instance + gc2 = GlobalGitConfig() + assert target_key in gc2 + assert gc2[target_key].value == target_value + + assert 'user.email' in gc + assert gc['user.email'] diff --git a/datalad_next/config/tests/test_manager.py b/datalad_next/config/tests/test_manager.py new file mode 100644 index 00000000..1e770925 --- /dev/null +++ b/datalad_next/config/tests/test_manager.py @@ -0,0 +1,26 @@ +import pytest + +from datalad_next.config import manager + + +def test_manager_setup(): + """Test the actual global configuration manager""" + target_sources = [ + 'legacy-environment', 'git-global', 'git-system', 'defaults', + ] + target_key = 'user.name' + absurd_must_be_absent_key = 'nobody.would.use.such.a.key' + # the order of sources is the precedence rule + assert list(manager.sources.keys()) == target_sources + # any real manager will have some keys + assert len(manager) + assert target_key in manager + assert absurd_must_be_absent_key not in manager + # test query + item = manager[target_key] + with pytest.raises(KeyError): + manager[absurd_must_be_absent_key] + # we cannot be really specific and also robust + assert item.value + assert manager[target_key] + assert manager.get(absurd_must_be_absent_key).value is None diff --git a/datalad_next/config/tests/test_utils.py b/datalad_next/config/tests/test_utils.py index f084f823..8062d4cf 100644 --- a/datalad_next/config/tests/test_utils.py +++ b/datalad_next/config/tests/test_utils.py @@ -2,7 +2,6 @@ import pytest from .. import utils # for patching environ - from ..utils import ( get_gitconfig_items_from_env, set_gitconfig_items_in_env, diff --git a/datalad_next/patches/common_cfg.py b/datalad_next/patches/common_cfg.py index 543d0d03..865ac233 100644 --- a/datalad_next/patches/common_cfg.py +++ b/datalad_next/patches/common_cfg.py @@ -6,6 +6,7 @@ This change does not override user-settings, only the default. """ +# TODO just reset the default in the new config manager from datalad.support.extensions import has_config if has_config('datalad.annex.retry'): diff --git a/datalad_next/patches/config.py b/datalad_next/patches/config.py new file mode 100644 index 00000000..dd8fe854 --- /dev/null +++ b/datalad_next/patches/config.py @@ -0,0 +1,38 @@ +"""Make `register_config()/has_config()` use `ImplementationDefault` instance + +The original implementation use a structure from +`datalad.interface.common_cfg`. The `defaults` instance of +`ImplementationDefault` from `datalad_next.config` also contains this +information, and consolidates it into a new structure and API. This patch +ensures that extensions registering their configuration items using this legacy +API, also feeds this `defaults` instance. +""" + +from datalad_next.patches import apply_patch + +from datalad_next.config import ( + LegacyConfigManager, + defaults, + legacy_cfg, + legacy_register_config, +) + + +def has_config(name: str): + return name in defaults + + +def register_config(*args, **kwargs): + legacy_register_config(defaults, *args, **kwargs) + + +# we have to inject the new class into a whole bunch of places, because +# it is imported very early +apply_patch('datalad.config', None, 'ConfigManager', LegacyConfigManager) +apply_patch('datalad.dataset.gitrepo', None, 'ConfigManager', LegacyConfigManager) + +apply_patch('datalad', None, 'cfg', legacy_cfg) +apply_patch('datalad.distribution.dataset', None, 'cfg', legacy_cfg) +apply_patch('datalad.support.extensions', None, 'register_config', + register_config) +apply_patch('datalad.support.extensions', None, 'has_config', has_config) diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py index 0bbef19e..c61afd76 100644 --- a/datalad_next/patches/enabled.py +++ b/datalad_next/patches/enabled.py @@ -1,5 +1,6 @@ from . import ( cli_configoverrides, + config, commanderror, common_cfg, annexrepo, diff --git a/datalad_next/runners/git.py b/datalad_next/runners/git.py index 9dcdf2a7..cc9a6078 100644 --- a/datalad_next/runners/git.py +++ b/datalad_next/runners/git.py @@ -71,6 +71,7 @@ def call_git( *, cwd: Path | None = None, force_c_locale: bool = False, + capture_output: bool = False, ) -> None: """Call Git with no output capture, raises on non-zero exit. @@ -80,10 +81,14 @@ def call_git( If ``force_c_locale`` is ``True`` the environment of the Git process is altered to ensure output according to the C locale. This is useful when output has to be processed in a locale invariant fashion. + + If ``capture_output`` is ``True``, process output is captured. This is + necessary for reporting any error messaging via a ``CommandError`` exception. + By default process output is not captured. """ _call_git( args, - capture_output=False, + capture_output=capture_output, cwd=cwd, check=True, force_c_locale=force_c_locale, diff --git a/pyproject.toml b/pyproject.toml index fa98dfe9..c5944bf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "humanize", "more-itertools", + "typing_extensions", ] [tool.hatch.metadata]