Skip to content

Commit

Permalink
TMP
Browse files Browse the repository at this point in the history
  • Loading branch information
mih committed Oct 1, 2024
1 parent 382164a commit 421d8f6
Show file tree
Hide file tree
Showing 14 changed files with 206 additions and 116 deletions.
14 changes: 13 additions & 1 deletion datalad_next/annexremotes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# in a single place
from annexremote import UnsupportedRequest
from typing import Any
from os import environ

from datalad.customremotes import (
# this is an enhanced RemoteError that self-documents its cause
Expand Down Expand Up @@ -32,9 +33,20 @@ def repo(self) -> LeanAnnexRepo:
to limit further proliferation of the ``AnnexRepo`` API.
"""
if self._repo is None:
self._repo = LeanAnnexRepo(self.annex.getgitdir())
self._repo = LeanAnnexRepo(self.repodir)
return self._repo

@property
def repodir(self) -> str:
import sys
repodir = self.annex.getgitdir()
# git-annex also sets GIT_DIR, and we want to account for that
# to be able to run regular Git command in this environment
gitdir_env = environ.get("GIT_DIR")
if gitdir_env and repodir.endswith(gitdir_env):
repodir = repodir[:-len(gitdir_env)]
return repodir

@property
def remotename(self) -> str:
"""Name of the (git) remote the special remote is operating under"""
Expand Down
30 changes: 20 additions & 10 deletions datalad_next/annexremotes/archivist.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from pathlib import Path
from shutil import copyfileobj
import sys
from typing import (
Dict,
Generator,
Expand Down Expand Up @@ -162,7 +163,7 @@ def prepare(self):
# us a `LeanAnnexRepo`.
# TODO it is unclear to MIH what is actually needed API-wise of the legacy
# interface. Needs research.
self._repo = LegacyAnnexRepo(self.annex.getgitdir())
self._repo = LegacyAnnexRepo(self.repodir)
# are we in legacy mode?
# let remote-specific setting take priority (there could be
# multiple archivist-type remotes configured), and use unspecific switch
Expand All @@ -175,7 +176,7 @@ def prepare(self):
# other code in this file will run!!!
# __getattribute__ will relay all top-level operations
# to an instance of the legacy implementation
from datalad.customremotes.archives import ArchiveAnnexCustomRemote
from datalad.customremotes.archives import ArchiveAnnexCustomRemote # type: ignore
lsr = ArchiveAnnexCustomRemote(self.annex)
lsr.prepare()
# we can skip everything else, it won't be triggered anymore
Expand Down Expand Up @@ -377,7 +378,7 @@ class _ArchiveHandlers:
The main functionality is provided by ``from_locators()``.
"""
# TODO make archive access caching behavior configurable from the outside
def __init__(self, repo):
def __init__(self, repo: LegacyAnnexRepo):
# mapping of archive keys to an info dict
self._db: Dict[AnnexKey, _ArchiveInfo] = {}
# for running git-annex queries against the repo
Expand Down Expand Up @@ -422,6 +423,7 @@ def from_locators(
}.items():
# local_path will be None now, if not around
if kh.local_path:
assert kh.handler
# we found one with a local archive.
# yield handler and all matching locators
yield kh.handler, [loc for loc in locs if loc.akey == akey]
Expand Down Expand Up @@ -466,7 +468,8 @@ def from_locators(
exc = []
# but this time sort the keys to start with the smallest ones
# (just in case a download is involved)
for akey in sorted(akeys, key=lambda x: x.size):
# when no size info is available, assume worst case
for akey in sorted(akeys, key=lambda x: x.size or sys.maxsize):
# at this point we must have an existing _ArchiveInfo record
# for this akey
ainfo = self._db[akey]
Expand All @@ -485,11 +488,17 @@ def from_locators(
# exceptions, make sure to report them
if exc:
# TODO better error
e = RuntimeError(
class RuntimeErrors(RuntimeError):
def __init__(self, msg, errors):
RuntimeError.__init__(self, msg)
self.errors = errors

overall_exc = RuntimeErrors(
'Exhausted all candidate archive handlers '
f'(previous failures {exc})')
e.errors = exc
raise e
f'(previous failures {exc})',
errors=exc,
)
raise overall_exc

def _get_archive_info(
self,
Expand Down Expand Up @@ -523,6 +532,7 @@ def _get_local_handler(self, ainfo: _ArchiveInfo) -> ArchiveOperations:
raise NotImplementedError

if ainfo.type == ArchiveType.tar:
assert ainfo.local_path is not None
from datalad_next.archive_operations import TarArchiveOperations
return TarArchiveOperations(
ainfo.local_path,
Expand Down Expand Up @@ -564,9 +574,9 @@ def _get_key_contentpath(repo: LegacyAnnexRepo, key: str):
# and the content can be found at the location
loc = next(repo.call_annex_items_(['contentlocation', key]))
# convert to path. git-annex will report a path relative to the
# dotgit-dir
# CWD of the call above
# TODO platform-native?
loc = repo.dot_git / Path(loc)
loc = repo.pathobj / Path(loc)
except CommandError:
loc = None
return loc
3 changes: 1 addition & 2 deletions datalad_next/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def legacy_register_config(
title: str,
*,
default: Any = UnsetValue,
default_fn: Callable | type[UnsetValue] = UnsetValue,
default_fn: Callable | None = None,
description: str | None = None,
type: Constraint | None = None, # noqa: A002
dialog: str | None = None,
Expand All @@ -99,7 +99,6 @@ def legacy_register_config(
),
store_target=get_store_target_from_destination_label(scope),
)

# lastly trigger legacy registration
_legacy_register_config(
name=name,
Expand Down
1 change: 0 additions & 1 deletion datalad_next/config/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

from datalad_next.config.item import ConfigurationItem


lgr = logging.getLogger('datalad.config')


Expand Down
95 changes: 51 additions & 44 deletions datalad_next/config/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@
import logging
import re
from abc import abstractmethod
from pathlib import Path
from os import name as os_name
from pathlib import Path
from typing import (
TYPE_CHECKING,
Hashable,
)

if TYPE_CHECKING:
from os import PathLike

from datasalad.settings import Setting

from datalad.consts import DATASET_CONFIG_FILE # type: ignore
from datasalad.itertools import (
decode_bytes,
itemize,
)
from datasalad.runners import CommandError as SaladCommandError
from datasalad.settings import CachingSource

from datalad.consts import DATASET_CONFIG_FILE

from datalad_next.config.item import ConfigurationItem
from datalad_next.runners import (
CommandError,
Expand Down Expand Up @@ -52,15 +52,15 @@ def _get_git_config_cmd(self) -> list[str]:
"""Return the git-config command suitable for a particular config"""

@abstractmethod
def _get_git_config_cwd(self) -> Path:
def _get_git_config_cwd(self) -> Path | None:
"""Return path the git-config command should run in"""

def reinit(self) -> None:
super().reinit()
self._sources: set[str | Path] = set()

def load(self) -> None:
cwd = self._get_git_config_cwd()
cwd = self._get_git_config_cwd() or Path.cwd()
dct: dict[str, str | tuple[str, ...]] = {}
fileset: set[str] = set()

Expand Down Expand Up @@ -93,33 +93,30 @@ def load(self) -> None:
# the "blobs" is known
self._sources = origin_paths.union(origin_blobs)

setter = '__setitem__'
for k, v in dct.items():
if isinstance(v, tuple):
vals = tuple(
ConfigurationItem(
value=val,
store_target=self.__class__,
)
for val in v
)
else:
vals = ConfigurationItem(
value=v,
if not isinstance(v, tuple):
v = (v,)
for val in v:
item = ConfigurationItem(
value=val,
store_target=self.__class__,
)
super().__setitem__(k, vals)
getattr(super(), setter)(k, item)
# for every subsequent value we must call add()
setter = 'add'


def __setitem__(self, key: str, value: Setting) -> None:
def __setitem__(self, key: Hashable, value: Setting) -> None:
call_git(
[*self._get_git_config_cmd(), '--replace-all', key, str(value.value)],
[*self._get_git_config_cmd(), '--replace-all', str(key), str(value.value)],
capture_output=True,
)
super().__setitem__(key, value)

def add(self, key: str, value: Setting) -> None:
def add(self, key: Hashable, value: Setting) -> None:
call_git(
[*self._get_git_config_cmd(), '--add', key, str(value.value)],
[*self._get_git_config_cmd(), '--add', str(key), str(value.value)],
capture_output=True,

)
Expand All @@ -130,50 +127,63 @@ class SystemGitConfig(GitConfig):
def _get_git_config_cmd(self) -> list[str]:
return [f'--git-dir={self.nul}', 'config', '--system']

def _get_git_config_cwd(self) -> Path:
def _get_git_config_cwd(self) -> Path | None:
return Path.cwd()


class GlobalGitConfig(GitConfig):
def _get_git_config_cmd(self) -> list[str]:
return [f'--git-dir={self.nul}', 'config', '--global']

def _get_git_config_cwd(self) -> Path:
def _get_git_config_cwd(self) -> Path | None:
return Path.cwd()


class LocalGitConfig(GitConfig):
def __init__(self, path: PathLike):
super().__init__()
self._path = path
pathobj = Path(path)

try:
self._is_bare_repo = call_git_oneline(
['rev-parse', '--is-bare-repository'],
cwd=path,
#TODO CHECK FOR GIT_DIR and adjust
self._in_worktree = call_git_oneline(
['rev-parse', '--is-inside-work-tree'],
cwd=pathobj,
force_c_locale=True,
) == 'true'
except CommandError:
# TODO: this is too simplistic. It could also be
# that there is no repo (yet)
self._is_bare_repo = False
except CommandError as e:
from os import environ
msg = f"no Git repository at {path}: {e!r} {environ.get('GIT_DIR')}"
raise ValueError(msg) from e

self._gitdir = Path(
path if not self._in_worktree
else call_git_oneline(
['rev-parse', '--path-format=absolute', '--git-dir'],
cwd=pathobj,
force_c_locale=True,
)
)

def _get_git_config_cmd(self) -> list[str]:
return ['-C', str(self._path), 'config', '--local']
return ['--git-dir', str(self._gitdir), 'config', '--local']

def _get_git_config_cwd(self) -> Path:
return self._path
def _get_git_config_cwd(self) -> Path | None:
# we set --git-dir, CWD does not matter
return None


class DataladBranchConfig(LocalGitConfig):
def __init__(self, path: PathLike):
super().__init__(path)
self._path = path

def _get_git_config_cmd(self) -> list[str]:
return [
'-C', str(self._path),
'config',
*(('--blob', 'HEAD:.datalad/config') if self._is_bare_repo else
('--file', str(self._path / DATASET_CONFIG_FILE))),
'--git-dir', str(self._gitdir), 'config',
*(('--file', str(self._path / DATASET_CONFIG_FILE))
if self._in_worktree
else ('--blob', f'HEAD:{DATASET_CONFIG_FILE}'))
]

def _ensure_target_dir(self):
Expand All @@ -182,11 +192,11 @@ def _ensure_target_dir(self):
custom_file = Path(cmd[cmd.index('--file') + 1])
custom_file.parent.mkdir(exist_ok=True)

def __setitem__(self, key: str, value: Setting) -> None:
def __setitem__(self, key: Hashable, value: Setting) -> None:
self._ensure_target_dir()
super().__setitem__(key, value)

def add(self, key: str, value: Setting) -> None:
def add(self, key: Hashable, value: Setting) -> None:
self._ensure_target_dir()
super().add(key, value)

Expand Down Expand Up @@ -224,10 +234,7 @@ def _proc_dump_line(
# man git-config:
# just name, which is a short-hand to say that the variable is
# the boolean
#v = "true"
# BUUUUUT datalad of old want it to stay `None`
# BUUUUUUUUT it also want it to be reported as True later on
v = None
v = "true"
# multi-value reporting
present_v = dct.get(k)
if present_v is None:
Expand Down
2 changes: 1 addition & 1 deletion datalad_next/config/gitenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def getall(
default: Any = None,
) -> tuple[Setting, ...]:
try:
val = get_gitconfig_items_from_env()[key]
val = get_gitconfig_items_from_env()[str(key)]
except KeyError:
return (self._get_default_setting(default),)
vals = val if isinstance(val, tuple) else (val,)
Expand Down
3 changes: 1 addition & 2 deletions datalad_next/config/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from datasalad.settings import Source

from datalad_next.config import (
Dialog,
dialog as dialog_collection,
)
from datalad_next.constraints import Constraint
Expand Down Expand Up @@ -60,7 +59,7 @@ def __init__(
self._store_target = store_target

@property
def dialog(self) -> Dialog | None:
def dialog(self) -> dialog_collection.Dialog | None:
return self._dialog

@property
Expand Down
Loading

0 comments on commit 421d8f6

Please sign in to comment.