diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 68e39580f6e..5e3e6dae4b2 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1,11 +1,13 @@ #include "git-utils.hh" #include "input-accessor.hh" #include "cache.hh" +#include "finally.hh" #include #include #include +#include #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include #include #include @@ -63,6 +66,8 @@ typedef std::unique_ptr> Reference; typedef std::unique_ptr> DescribeResult; typedef std::unique_ptr> StatusList; typedef std::unique_ptr> Remote; +typedef std::unique_ptr> GitConfig; +typedef std::unique_ptr> ConfigIterator; // A helper to ensure that we don't leak objects returned by libgit2. template @@ -256,6 +261,17 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return std::nullopt; } + std::vector getSubmodules(const Hash & rev) override; + + std::string resolveSubmoduleUrl(const std::string & url) override + { + git_buf buf = GIT_BUF_INIT; + if (git_submodule_resolve_url(&buf, *this, url.c_str())) + throw Error("resolving Git submodule URL '%s'", url); + Finally cleanup = [&]() { git_buf_dispose(&buf); }; + return buf.ptr; + } + bool hasObject(const Hash & oid_) override { auto oid = hashToOID(oid_); @@ -400,6 +416,16 @@ struct GitInputAccessor : InputAccessor return readBlob(path, true); } + Hash getSubmoduleRev(const CanonPath & path) + { + auto entry = need(path); + + if (git_tree_entry_type(entry) != GIT_OBJECT_COMMIT) + throw Error("'%s' is not a submodule", showPath(path)); + + return toHash(*git_tree_entry_id(entry)); + } + std::map lookupCache; /* Recursively look up 'path' relative to the root. */ @@ -495,4 +521,56 @@ ref GitRepoImpl::getAccessor(const Hash & rev) return make_ref(ref(shared_from_this()), rev); } +std::vector GitRepoImpl::getSubmodules(const Hash & rev) +{ + /* Read the .gitmodules files from this revision. */ + CanonPath modulesFile(".gitmodules"); + + auto accessor = getAccessor(rev); + if (!accessor->pathExists(modulesFile)) return {}; + + /* Parse it. */ + auto configS = accessor->readFile(modulesFile); + + auto [fdTemp, pathTemp] = createTempFile("nix-git-submodules"); + writeFull(fdTemp.get(), configS); + + GitConfig config; + if (git_config_open_ondisk(Setter(config), pathTemp.c_str())) + throw Error("parsing .gitmodules file: %s", git_error_last()->message); + + ConfigIterator it; + if (git_config_iterator_glob_new(Setter(it), config.get(), "^submodule\\..*\\.(path|url|branch)$")) + throw Error("iterating over .gitmodules: %s", git_error_last()->message); + + std::map entries; + + while (true) { + git_config_entry * entry = nullptr; + if (auto err = git_config_next(&entry, it.get())) { + if (err == GIT_ITEROVER) break; + throw Error("iterating over .gitmodules: %s", git_error_last()->message); + } + entries.emplace(entry->name + 10, entry->value); + } + + std::vector result; + + for (auto & [key, value] : entries) { + if (!hasSuffix(key, ".path")) continue; + std::string key2(key, 0, key.size() - 5); + auto path = CanonPath(value); + auto rev = accessor.dynamic_pointer_cast()->getSubmoduleRev(path); + result.push_back(Submodule { + .path = path, + .url = entries[key2 + ".url"], + .branch = entries[key2 + ".branch"], + .rev = rev, + }); + } + + return result; +} + + } diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index dd2c066725b..55e7ef969ac 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -38,6 +38,18 @@ struct GitRepo /* Get the ref that HEAD points to. */ virtual std::optional getWorkdirRef() = 0; + struct Submodule + { + CanonPath path; + std::string url; + std::string branch; + Hash rev; + }; + + virtual std::vector getSubmodules(const Hash & rev) = 0; + + virtual std::string resolveSubmoduleUrl(const std::string & url) = 0; + struct TarballInfo { Hash treeHash; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 55d3a8ebed6..42b4aa23a6f 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -8,6 +8,7 @@ #include "util.hh" #include "git.hh" #include "fs-input-accessor.hh" +#include "union-input-accessor.hh" #include "git-utils.hh" #include "fetch-settings.hh" @@ -134,11 +135,6 @@ std::optional readHeadCached(const std::string & actualUrl) return std::nullopt; } -bool isNotDotGitDirectory(const Path & path) -{ - return baseNameOf(path) != ".git"; -} - } // end namespace struct GitInputScheme : InputScheme @@ -413,7 +409,7 @@ struct GitInputScheme : InputScheme std::string name = input.getName(); - auto makeResult2 = [&](const Attrs & infoAttrs, ref accessor) -> std::pair, Input> + auto makeResult = [&](const Attrs & infoAttrs, ref accessor) -> std::pair, Input> { assert(input.getRev()); assert(!origRev || origRev == input.getRev()); @@ -424,18 +420,6 @@ struct GitInputScheme : InputScheme return {accessor, std::move(input)}; }; - auto makeResult = [&](const Attrs & infoAttrs, const StorePath & storePath) -> std::pair, Input> - { - // FIXME: remove? - //input.attrs.erase("narHash"); - auto narHash = store->queryPathInfo(storePath)->narHash; - input.attrs.insert_or_assign("narHash", narHash.to_string(HashFormat::SRI, true)); - - auto accessor = makeStorePathAccessor(store, storePath, makeNotAllowedError(repoInfo.url)); - - return makeResult2(infoAttrs, accessor); - }; - auto originalRef = input.getRef(); auto ref = originalRef ? *originalRef : getDefaultRef(repoInfo); input.attrs.insert_or_assign("ref", ref); @@ -542,66 +526,39 @@ struct GitInputScheme : InputScheme printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.url); - if (!repoInfo.submodules) { - auto accessor = GitRepo::openRepo(CanonPath(repoDir))->getAccessor(rev); - return makeResult2(infoAttrs, accessor); - } - - else { - // FIXME: use libgit2 - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - PathFilter filter = defaultPathFilter; - - Activity act(*logger, lvlChatty, actUnknown, fmt("copying Git tree '%s' to the store", input.to_string())); - - Path tmpGitDir = createTempDir(); - AutoDelete delTmpGitDir(tmpGitDir, true); - - runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir }); - - { - // TODO: repoDir might lack the ref (it only checks if rev - // exists, see FIXME above) so use a big hammer and fetch - // everything to ensure we get the rev. - Activity act(*logger, lvlTalkative, actUnknown, fmt("making temporary clone of '%s'", repoDir)); - runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", - "--update-head-ok", "--", repoDir, "refs/*:refs/*" }, {}, true); + auto repo = GitRepo::openRepo(CanonPath(repoDir)); + + auto accessor = repo->getAccessor(rev); + + /* If the repo has submodules, fetch them and return a union + input accessor consisting of the accessor for the top-level + repo and the accessors for the submodules. */ + if (repoInfo.submodules) { + std::map> mounts; + + for (auto & submodule : repo->getSubmodules(rev)) { + auto resolved = repo->resolveSubmoduleUrl(submodule.url); + debug("Git submodule %s: %s %s %s -> %s", + submodule.path, submodule.url, submodule.branch, submodule.rev.gitRev(), resolved); + fetchers::Attrs attrs; + attrs.insert_or_assign("type", "git"); + attrs.insert_or_assign("url", resolved); + if (submodule.branch != "") + attrs.insert_or_assign("ref", submodule.branch); + attrs.insert_or_assign("rev", submodule.rev.gitRev()); + auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs)); + auto [submoduleAccessor, submoduleInput2] = + submoduleInput.scheme->getAccessor(store, submoduleInput); + mounts.insert_or_assign(submodule.path, submoduleAccessor); } - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", rev.gitRev() }); - - /* Ensure that we use the correct origin for fetching - submodules. This matters for submodules with relative - URLs. */ - if (repoInfo.isLocal) { - writeFile(tmpGitDir + "/config", readFile(repoDir + "/" + repoInfo.gitDir + "/config")); - - /* Restore the config.bare setting we may have just - copied erroneously from the user's repo. */ - runProgram("git", true, { "-C", tmpDir, "config", "core.bare", "false" }); - } else - runProgram("git", true, { "-C", tmpDir, "config", "remote.origin.url", repoInfo.url }); - - /* As an optimisation, copy the modules directory of the - source repo if it exists. */ - auto modulesPath = repoDir + "/" + repoInfo.gitDir + "/modules"; - if (pathExists(modulesPath)) { - Activity act(*logger, lvlTalkative, actUnknown, fmt("copying submodules of '%s'", repoInfo.url)); - runProgram("cp", true, { "-R", "--", modulesPath, tmpGitDir + "/modules" }); + if (!mounts.empty()) { + mounts.insert_or_assign(CanonPath::root, accessor); + accessor = makeUnionInputAccessor(std::move(mounts)); } - - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching submodules of '%s'", repoInfo.url)); - runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }, {}, true); - } - - filter = isNotDotGitDirectory; - - auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - - return makeResult(infoAttrs, std::move(storePath)); } + + return makeResult(infoAttrs, accessor); } std::pair, Input> getAccessorFromWorkdir( diff --git a/src/libfetchers/union-input-accessor.cc b/src/libfetchers/union-input-accessor.cc new file mode 100644 index 00000000000..940c0e06cc8 --- /dev/null +++ b/src/libfetchers/union-input-accessor.cc @@ -0,0 +1,80 @@ +#include "union-input-accessor.hh" + +namespace nix { + +struct UnionInputAccessor : InputAccessor +{ + std::map> mounts; + + UnionInputAccessor(std::map> _mounts) + : mounts(std::move(_mounts)) + { + // Currently we require a root filesystem. This could be relaxed. + assert(mounts.contains(CanonPath::root)); + + // FIXME: should check that every mount point exists. Or we + // could return dummy parent directories automatically. + } + + std::string readFile(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readFile(subpath); + } + + bool pathExists(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->pathExists(subpath); + } + + Stat lstat(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->lstat(subpath); + } + + DirEntries readDirectory(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readDirectory(subpath); + } + + std::string readLink(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readLink(subpath); + } + + std::string showPath(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->showPath(subpath); + } + + std::pair, CanonPath> resolve(CanonPath path) + { + // Find the nearest parent of `path` that is a mount point. + std::vector ss; + while (true) { + auto i = mounts.find(path); + if (i != mounts.end()) { + auto subpath = CanonPath::root; + for (auto j = ss.rbegin(); j != ss.rend(); ++j) + subpath.push(*j); + return {i->second, std::move(subpath)}; + } + + assert(!path.isRoot()); + ss.push_back(std::string(*path.baseName())); + path.pop(); + } + } +}; + +ref makeUnionInputAccessor(std::map> mounts) +{ + return make_ref(std::move(mounts)); +} + +} diff --git a/src/libfetchers/union-input-accessor.hh b/src/libfetchers/union-input-accessor.hh new file mode 100644 index 00000000000..6a1649c1dea --- /dev/null +++ b/src/libfetchers/union-input-accessor.hh @@ -0,0 +1,9 @@ +#pragma once + +#include "input-accessor.hh" + +namespace nix { + +ref makeUnionInputAccessor(std::map> mounts); + +} diff --git a/tests/functional/fetchGitSubmodules.sh b/tests/functional/fetchGitSubmodules.sh index df81232e599..369cdc5db43 100644 --- a/tests/functional/fetchGitSubmodules.sh +++ b/tests/functional/fetchGitSubmodules.sh @@ -118,11 +118,3 @@ cloneRepo=$TEST_ROOT/a/b/gitSubmodulesClone # NB /a/b to make the relative path git clone $rootRepo $cloneRepo pathIndirect=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath") [[ $pathIndirect = $pathWithRelative ]] - -# Test that if the clone has the submodule already, we're not fetching -# it again. -git -C $cloneRepo submodule update --init -rm $TEST_HOME/.cache/nix/fetcher-cache* -rm -rf $subRepo -pathSubmoduleGone=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath") -[[ $pathSubmoduleGone = $pathWithRelative ]]