diff --git a/doc/manual/rl-next/git-fetcher.md b/doc/manual/rl-next/git-fetcher.md new file mode 100644 index 00000000000..54c0d216d1b --- /dev/null +++ b/doc/manual/rl-next/git-fetcher.md @@ -0,0 +1,18 @@ +--- +synopsis: "Nix now uses `libgit2` for Git fetching" +prs: + - 9240 + - 9241 + - 9258 + - 9480 +issues: + - 5313 +--- + +Nix has built-in support for fetching sources from Git, during evaluation and locking; outside the sandbox. +The existing implementation based on the Git CLI had issues regarding reproducibility and performance. + +Most of the original `fetchGit` behavior has been implemented using the `libgit2` library, which gives the fetcher fine-grained control. + +Known issues: +- The `export-subst` behavior has not been reimplemented. [Partial](https://github.com/NixOS/nix/pull/9391#issuecomment-1872503447) support for this Git feature is feasible, but it did not make the release window. diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index eb2df8626c9..7251cbbbe59 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -1,3 +1,4 @@ +#include "libfetchers/attrs.hh" #include "primops.hh" #include "eval-inline.hh" #include "eval-settings.hh" @@ -25,7 +26,7 @@ void emitTreeAttrs( { assert(input.isLocked()); - auto attrs = state.buildBindings(10); + auto attrs = state.buildBindings(100); state.mkStorePathString(storePath, attrs.alloc(state.sOutPath)); @@ -135,6 +136,10 @@ static void fetchTree( state.symbols[attr.name], showType(*attr.value))); } + if (params.isFetchGit && !attrs.contains("exportIgnore") && (!attrs.contains("submodules") || !*fetchers::maybeGetBoolAttr(attrs, "submodules"))) { + attrs.emplace("exportIgnore", Explicit{true}); + } + if (!params.allowNameArgument) if (auto nameIter = attrs.find("name"); nameIter != attrs.end()) state.debugThrowLastTrace(EvalError({ @@ -152,6 +157,9 @@ static void fetchTree( fetchers::Attrs attrs; attrs.emplace("type", "git"); attrs.emplace("url", fixGitURL(url)); + if (!attrs.contains("exportIgnore") && (!attrs.contains("submodules") || !*fetchers::maybeGetBoolAttr(attrs, "submodules"))) { + attrs.emplace("exportIgnore", Explicit{true}); + } input = fetchers::Input::fromAttrs(std::move(attrs)); } else { if (!experimentalFeatureSettings.isEnabled(Xp::Flakes)) @@ -593,6 +601,13 @@ static RegisterPrimOp primop_fetchGit({ A Boolean parameter that specifies whether submodules should be checked out. + - `exportIgnore` (default: `true`) + + A Boolean parameter that specifies whether `export-ignore` from `.gitattributes` should be applied. + This approximates part of the `git archive` behavior. + + Enabling this option is not recommended because it is unknown whether the Git developers commit to the reproducibility of `export-ignore` in newer Git versions. + - `shallow` (default: `false`) A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed. diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 5f3254b6d88..036647830d3 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -187,6 +187,13 @@ struct InputScheme virtual bool isDirect(const Input & input) const { return true; } + /** + * A sufficiently unique string that can be used as a cache key to identify the `input`. + * + * Only known-equivalent inputs should return the same fingerprint. + * + * This is not a stable identifier between Nix versions, but not guaranteed to change either. + */ virtual std::optional getFingerprint(ref store, const Input & input) const { return std::nullopt; } }; diff --git a/src/libfetchers/filtering-input-accessor.cc b/src/libfetchers/filtering-input-accessor.cc index 5ae416fd362..581ce3c1d54 100644 --- a/src/libfetchers/filtering-input-accessor.cc +++ b/src/libfetchers/filtering-input-accessor.cc @@ -80,4 +80,13 @@ ref AllowListInputAccessor::create( return make_ref(next, std::move(allowedPaths), std::move(makeNotAllowedError)); } +bool CachingFilteringInputAccessor::isAllowed(const CanonPath & path) +{ + auto i = cache.find(path); + if (i != cache.end()) return i->second; + auto res = isAllowedUncached(path); + cache.emplace(path, res); + return res; +} + } diff --git a/src/libfetchers/filtering-input-accessor.hh b/src/libfetchers/filtering-input-accessor.hh index a352a33a6e7..8a9b206ee51 100644 --- a/src/libfetchers/filtering-input-accessor.hh +++ b/src/libfetchers/filtering-input-accessor.hh @@ -6,7 +6,7 @@ namespace nix { /** - * A function that should throw an exception of type + * A function that returns an exception of type * `RestrictedPathError` explaining that access to `path` is * forbidden. */ @@ -71,4 +71,18 @@ struct AllowListInputAccessor : public FilteringInputAccessor using FilteringInputAccessor::FilteringInputAccessor; }; +/** + * A wrapping `InputAccessor` mix-in where `isAllowed()` caches the result of virtual `isAllowedUncached()`. + */ +struct CachingFilteringInputAccessor : FilteringInputAccessor +{ + std::map cache; + + using FilteringInputAccessor::FilteringInputAccessor; + + bool isAllowed(const CanonPath & path) override; + + virtual bool isAllowedUncached(const CanonPath & path) = 0; +}; + } diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 65f7b45ef36..6726407b5db 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1,5 +1,7 @@ #include "git-utils.hh" +#include "fs-input-accessor.hh" #include "input-accessor.hh" +#include "filtering-input-accessor.hh" #include "cache.hh" #include "finally.hh" #include "processes.hh" @@ -7,6 +9,7 @@ #include +#include #include #include #include @@ -21,6 +24,7 @@ #include #include +#include #include #include #include @@ -50,6 +54,8 @@ bool operator == (const git_oid & oid1, const git_oid & oid2) namespace nix { +struct GitInputAccessor; + // Some wrapper types that ensure that the git_*_free functions get called. template struct Deleter @@ -307,7 +313,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return std::nullopt; } - std::vector> getSubmodules(const Hash & rev) override; + std::vector> getSubmodules(const Hash & rev, bool exportIgnore) override; std::string resolveSubmoduleUrl( const std::string & url, @@ -340,7 +346,14 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return true; } - ref getAccessor(const Hash & rev) override; + /** + * A 'GitInputAccessor' with no regard for export-ignore or any other transformations. + */ + ref getRawAccessor(const Hash & rev); + + ref getAccessor(const Hash & rev, bool exportIgnore) override; + + ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override; static int sidebandProgressCallback(const char * str, int len, void * payload) { @@ -456,6 +469,9 @@ ref GitRepo::openRepo(const CanonPath & path, bool create, bool bare) return make_ref(path, create, bare); } +/** + * Raw git tree input accessor. + */ struct GitInputAccessor : InputAccessor { ref repo; @@ -644,17 +660,114 @@ struct GitInputAccessor : InputAccessor } }; -ref GitRepoImpl::getAccessor(const Hash & rev) +struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor { + ref repo; + std::optional rev; + + GitExportIgnoreInputAccessor(ref repo, ref next, std::optional rev) + : CachingFilteringInputAccessor(next, [&](const CanonPath & path) { + return RestrictedPathError(fmt("'%s' does not exist because it was fetched with exportIgnore enabled", path)); + }) + , repo(repo) + , rev(rev) + { } + + bool gitAttrGet(const CanonPath & path, const char * attrName, const char * & valueOut) + { + const char * pathCStr = path.rel_c_str(); + + if (rev) { + git_attr_options opts = GIT_ATTR_OPTIONS_INIT; + opts.attr_commit_id = hashToOID(*rev); + // TODO: test that gitattributes from global and system are not used + // (ie more or less: home and etc - both of them!) + opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM; + return git_attr_get_ext( + &valueOut, + *repo, + &opts, + pathCStr, + attrName + ); + } + else { + return git_attr_get( + &valueOut, + *repo, + GIT_ATTR_CHECK_INDEX_ONLY | GIT_ATTR_CHECK_NO_SYSTEM, + pathCStr, + attrName); + } + } + + bool isExportIgnored(const CanonPath & path) + { + const char *exportIgnoreEntry = nullptr; + + // GIT_ATTR_CHECK_INDEX_ONLY: + // > It will use index only for creating archives or for a bare repo + // > (if an index has been specified for the bare repo). + // -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48 + if (gitAttrGet(path, "export-ignore", exportIgnoreEntry)) { + if (git_error_last()->klass == GIT_ENOTFOUND) + return false; + else + throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); + } + else { + // Official git will silently reject export-ignore lines that have + // values. We do the same. + return GIT_ATTR_IS_TRUE(exportIgnoreEntry); + } + } + + bool isAllowedUncached(const CanonPath & path) override + { + return !isExportIgnored(path); + } + +}; + +ref GitRepoImpl::getRawAccessor(const Hash & rev) { - return make_ref(ref(shared_from_this()), rev); + auto self = ref(shared_from_this()); + return make_ref(self, rev); } -std::vector> GitRepoImpl::getSubmodules(const Hash & rev) +ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore) +{ + auto self = ref(shared_from_this()); + ref rawGitAccessor = getRawAccessor(rev); + if (exportIgnore) { + return make_ref(self, rawGitAccessor, rev); + } + else { + return rawGitAccessor; + } +} + +ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) +{ + auto self = ref(shared_from_this()); + ref fileAccessor = + AllowListInputAccessor::create( + makeFSInputAccessor(path), + std::set { wd.files }, + std::move(makeNotAllowedError)); + if (exportIgnore) { + return make_ref(self, fileAccessor, std::nullopt); + } + else { + return fileAccessor; + } +} + +std::vector> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore) { /* Read the .gitmodules files from this revision. */ CanonPath modulesFile(".gitmodules"); - auto accessor = getAccessor(rev); + auto accessor = getAccessor(rev, exportIgnore); if (!accessor->pathExists(modulesFile)) return {}; /* Parse it and get the revision of each submodule. */ @@ -665,8 +778,10 @@ std::vector> GitRepoImpl::getSubmodules std::vector> result; + auto rawAccessor = getRawAccessor(rev); + for (auto & submodule : parseSubmodules(CanonPath(pathTemp))) { - auto rev = accessor.dynamic_pointer_cast()->getSubmoduleRev(submodule.path); + auto rev = rawAccessor->getSubmoduleRev(submodule.path); result.push_back({std::move(submodule), rev}); } diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 1def82071ef..7685547803a 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -1,5 +1,6 @@ #pragma once +#include "filtering-input-accessor.hh" #include "input-accessor.hh" namespace nix { @@ -57,7 +58,7 @@ struct GitRepo * Return the submodules of this repo at the indicated revision, * along with the revision of each submodule. */ - virtual std::vector> getSubmodules(const Hash & rev) = 0; + virtual std::vector> getSubmodules(const Hash & rev, bool exportIgnore) = 0; virtual std::string resolveSubmoduleUrl( const std::string & url, @@ -71,7 +72,9 @@ struct GitRepo virtual bool hasObject(const Hash & oid) = 0; - virtual ref getAccessor(const Hash & rev) = 0; + virtual ref getAccessor(const Hash & rev, bool exportIgnore) = 0; + + virtual ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0; virtual void fetch( const std::string & url, diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 79270c3179f..6ecb7a4ea1f 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -1,3 +1,4 @@ +#include "error.hh" #include "fetchers.hh" #include "users.hh" #include "cache.hh" @@ -9,7 +10,6 @@ #include "processes.hh" #include "git.hh" #include "fs-input-accessor.hh" -#include "filtering-input-accessor.hh" #include "mounted-input-accessor.hh" #include "git-utils.hh" #include "logging.hh" @@ -174,7 +174,7 @@ struct GitInputScheme : InputScheme for (auto & [name, value] : url.query) { if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys") attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs" || name == "verifyCommit") + else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit") attrs.emplace(name, Explicit { value == "1" }); else url2.query.emplace(name, value); @@ -199,6 +199,7 @@ struct GitInputScheme : InputScheme "rev", "shallow", "submodules", + "exportIgnore", "lastModified", "revCount", "narHash", @@ -250,6 +251,8 @@ struct GitInputScheme : InputScheme url.query.insert_or_assign("shallow", "1"); if (getSubmodulesAttr(input)) url.query.insert_or_assign("submodules", "1"); + if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false)) + url.query.insert_or_assign("exportIgnore", "1"); if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false)) url.query.insert_or_assign("verifyCommit", "1"); auto publicKeys = getPublicKeys(input.attrs); @@ -372,6 +375,11 @@ struct GitInputScheme : InputScheme return maybeGetBoolAttr(input.attrs, "submodules").value_or(false); } + bool getExportIgnoreAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false); + } + bool getAllRefsAttr(const Input & input) const { return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); @@ -600,7 +608,8 @@ struct GitInputScheme : InputScheme verifyCommit(input, repo); - auto accessor = repo->getAccessor(rev); + bool exportIgnore = getExportIgnoreAttr(input); + auto accessor = repo->getAccessor(rev, exportIgnore); accessor->setPathDisplay("«" + input.to_string() + "»"); @@ -610,7 +619,7 @@ struct GitInputScheme : InputScheme if (getSubmodulesAttr(input)) { std::map> mounts; - for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) { + for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) { auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url); debug("Git submodule %s: %s %s %s -> %s", submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved); @@ -620,6 +629,7 @@ struct GitInputScheme : InputScheme if (submodule.branch != "") attrs.insert_or_assign("ref", submodule.branch); attrs.insert_or_assign("rev", submoduleRev.gitRev()); + attrs.insert_or_assign("exportIgnore", Explicit{ exportIgnore }); auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs)); auto [submoduleAccessor, submoduleInput2] = submoduleInput.getAccessor(store); @@ -650,10 +660,13 @@ struct GitInputScheme : InputScheme for (auto & submodule : repoInfo.workdirInfo.submodules) repoInfo.workdirInfo.files.insert(submodule.path); + auto repo = GitRepo::openRepo(CanonPath(repoInfo.url), false, false); + + auto exportIgnore = getExportIgnoreAttr(input); + ref accessor = - AllowListInputAccessor::create( - makeFSInputAccessor(CanonPath(repoInfo.url)), - std::move(repoInfo.workdirInfo.files), + repo->getAccessor(repoInfo.workdirInfo, + exportIgnore, makeNotAllowedError(repoInfo.url)); /* If the repo has submodules, return a mounted input accessor @@ -667,6 +680,8 @@ struct GitInputScheme : InputScheme fetchers::Attrs attrs; attrs.insert_or_assign("type", "git"); attrs.insert_or_assign("url", submodulePath.abs()); + attrs.insert_or_assign("exportIgnore", Explicit{ exportIgnore }); + auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs)); auto [submoduleAccessor, submoduleInput2] = submoduleInput.getAccessor(store); @@ -725,6 +740,16 @@ struct GitInputScheme : InputScheme auto repoInfo = getRepoInfo(input); + if (getExportIgnoreAttr(input) + && getSubmodulesAttr(input)) { + /* In this situation, we don't have a git CLI behavior that we can copy. + `git archive` does not support submodules, so it is unclear whether + rules from the parent should affect the submodule or not. + When git may eventually implement this, we need Nix to match its + behavior. */ + throw UnimplementedError("exportIgnore and submodules are not supported together yet"); + } + auto [accessor, final] = input.getRef() || input.getRev() || !repoInfo.isLocal ? getAccessorFromCommit(store, repoInfo, std::move(input)) @@ -738,7 +763,7 @@ struct GitInputScheme : InputScheme std::optional getFingerprint(ref store, const Input & input) const override { if (auto rev = input.getRev()) - return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : ""); + return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : ""); else return std::nullopt; } diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh index 6aff4ec0d74..997c8c73131 100644 --- a/src/libutil/canon-path.hh +++ b/src/libutil/canon-path.hh @@ -88,6 +88,13 @@ public: std::string_view rel() const { return ((std::string_view) path).substr(1); } + const char * rel_c_str() const + { + auto cs = path.c_str(); + assert(cs[0]); // for safety if invariant is broken + return &cs[1]; + } + struct Iterator { std::string_view remaining; diff --git a/tests/functional/fetchGit.sh b/tests/functional/fetchGit.sh index 4985c7764f6..46532c02556 100644 --- a/tests/functional/fetchGit.sh +++ b/tests/functional/fetchGit.sh @@ -229,6 +229,18 @@ rev_tag2=$(git -C $repo rev-parse refs/tags/tag2) [[ $rev_tag2_nix = $rev_tag2 ]] unset _NIX_FORCE_HTTP +# Ensure .gitattributes is respected +touch $repo/not-exported-file +touch $repo/exported-wonky +echo "/not-exported-file export-ignore" >> $repo/.gitattributes +echo "/exported-wonky export-ignore=wonk" >> $repo/.gitattributes +git -C $repo add not-exported-file exported-wonky .gitattributes +git -C $repo commit -m 'Bla6' +rev5=$(git -C $repo rev-parse HEAD) +path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath") +[[ ! -e $path12/not-exported-file ]] +[[ -e $path12/exported-wonky ]] + # should fail if there is no repo rm -rf $repo/.git (! nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath") diff --git a/tests/functional/fetchGitSubmodules.sh b/tests/functional/fetchGitSubmodules.sh index 369cdc5db43..cd180815d69 100644 --- a/tests/functional/fetchGitSubmodules.sh +++ b/tests/functional/fetchGitSubmodules.sh @@ -118,3 +118,55 @@ cloneRepo=$TEST_ROOT/a/b/gitSubmodulesClone # NB /a/b to make the relative path git clone $rootRepo $cloneRepo pathIndirect=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath") [[ $pathIndirect = $pathWithRelative ]] + +# Test submodule export-ignore interaction +git -C $rootRepo/sub config user.email "foobar@example.com" +git -C $rootRepo/sub config user.name "Foobar" + +echo "/exclude-from-root export-ignore" >> $rootRepo/.gitattributes +# TBD possible semantics for submodules + exportIgnore +# echo "/sub/exclude-deep export-ignore" >> $rootRepo/.gitattributes +echo nope > $rootRepo/exclude-from-root +git -C $rootRepo add .gitattributes exclude-from-root +git -C $rootRepo commit -m "Add export-ignore" + +echo "/exclude-from-sub export-ignore" >> $rootRepo/sub/.gitattributes +echo nope > $rootRepo/sub/exclude-from-sub +# TBD possible semantics for submodules + exportIgnore +# echo aye > $rootRepo/sub/exclude-from-root +git -C $rootRepo/sub add .gitattributes exclude-from-sub +git -C $rootRepo/sub commit -m "Add export-ignore (sub)" + +git -C $rootRepo add sub +git -C $rootRepo commit -m "Update submodule" + +git -C $rootRepo status + +# # TBD: not supported yet, because semantics are undecided and current implementation leaks rules from the root to submodules +# # exportIgnore can be used with submodules +# pathWithExportIgnore=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; exportIgnore = true; }).outPath") +# # find $pathWithExportIgnore +# # git -C $rootRepo archive --format=tar HEAD | tar -t +# # cp -a $rootRepo /tmp/rootRepo + +# [[ -e $pathWithExportIgnore/sub/content ]] +# [[ ! -e $pathWithExportIgnore/exclude-from-root ]] +# [[ ! -e $pathWithExportIgnore/sub/exclude-from-sub ]] +# TBD possible semantics for submodules + exportIgnore +# # root .gitattribute has no power across submodule boundary +# [[ -e $pathWithExportIgnore/sub/exclude-from-root ]] +# [[ -e $pathWithExportIgnore/sub/exclude-deep ]] + + +# exportIgnore can be explicitly disabled with submodules +pathWithoutExportIgnore=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; exportIgnore = false; }).outPath") +# find $pathWithoutExportIgnore + +[[ -e $pathWithoutExportIgnore/exclude-from-root ]] +[[ -e $pathWithoutExportIgnore/sub/exclude-from-sub ]] + +# exportIgnore defaults to false when submodules = true +pathWithSubmodules=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; }).outPath") + +[[ -e $pathWithoutExportIgnore/exclude-from-root ]] +[[ -e $pathWithoutExportIgnore/sub/exclude-from-sub ]]