From 3b44d2d90b3e7875d5f0220a87c1aea0b914392c Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 4 Sep 2023 09:51:23 -0400 Subject: [PATCH] Git object hashing Part of RFC 133 Extracted from our old IPFS branches. Co-Authored-By: Matthew Bauer Co-Authored-By: Carlo Nucera --- src/libexpr/primops/fetchTree.cc | 15 +- src/libfetchers/fetchers.cc | 34 ++- src/libfetchers/fetchers.hh | 1 + src/libfetchers/git.cc | 191 ++++++++++++---- src/libfetchers/github.cc | 2 +- src/libfetchers/mercurial.cc | 3 +- src/libfetchers/path.cc | 2 +- src/libstore/binary-cache-store.cc | 9 +- src/libstore/build/local-derivation-goal.cc | 20 +- src/libstore/content-address.cc | 6 + src/libstore/content-address.hh | 18 +- src/libstore/daemon.cc | 7 +- src/libstore/local-store.cc | 47 +++- src/libstore/nar-accessor.cc | 3 +- src/libstore/remote-store.cc | 1 + src/libstore/store-api.cc | 51 ++++- src/libutil/archive.cc | 65 ------ src/libutil/archive.hh | 18 +- src/libutil/experimental-features.cc | 10 +- src/libutil/experimental-features.hh | 1 + src/libutil/fs-sink.cc | 104 +++++++++ src/libutil/fs-sink.hh | 48 ++++ src/libutil/git.cc | 233 +++++++++++++++++++- src/libutil/git.hh | 28 +++ src/libutil/hash.cc | 8 + src/libutil/hash.hh | 16 +- src/libutil/serialise.cc | 4 + src/libutil/serialise.hh | 1 + src/nix/add-to-store.cc | 37 +++- src/nix/hash.cc | 18 +- tests/git.sh | 83 +++++++ tests/local.mk | 1 + 32 files changed, 908 insertions(+), 177 deletions(-) create mode 100644 src/libutil/fs-sink.cc create mode 100644 src/libutil/fs-sink.hh create mode 100644 tests/git.sh diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index f040a35109a8..93656ca043e1 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -30,9 +30,13 @@ void emitTreeAttrs( // FIXME: support arbitrary input attributes. - auto narHash = input.getNarHash(); - assert(narHash); - attrs.alloc("narHash").mkString(narHash->to_string(SRI, true)); + if (auto narHash = input.getNarHash()) { + attrs.alloc("narHash").mkString(narHash->to_string(SRI, true)); + } else if (auto treeHash = input.getTreeHash()) { + attrs.alloc("treeHash").mkString(treeHash->to_string(SRI, true)); + } else + /* Must have either tree hash or NAR hash */ + assert(false); if (input.getType() == "git") attrs.alloc("submodules").mkBool( @@ -50,6 +54,11 @@ void emitTreeAttrs( attrs.alloc("shortRev").mkString(emptyHash.gitShortRev()); } + if (auto treeHash = input.getTreeHash()) { + attrs.alloc("treeHash").mkString(treeHash->gitRev()); + attrs.alloc("shortTreeHash").mkString(treeHash->gitRev()); + } + if (auto revCount = input.getRevCount()) attrs.alloc("revCount").mkInt(*revCount); else if (emptyRevFallback) diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index e683b9f804a6..a3963640d839 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -25,6 +25,8 @@ static void fixupInput(Input & input) input.getRef(); if (input.getRev()) input.locked = true; + if (input.getTreeHash()) + input.locked = true; input.getRevCount(); input.getLastModified(); if (input.getNarHash()) @@ -89,7 +91,7 @@ Attrs Input::toAttrs() const bool Input::hasAllInfo() const { - return getNarHash() && scheme && scheme->hasAllInfo(*this); + return scheme && scheme->hasAllInfo(*this); } bool Input::operator ==(const Input & other) const @@ -213,14 +215,19 @@ std::string Input::getName() const StorePath Input::computeStorePath(Store & store) const { - auto narHash = getNarHash(); - if (!narHash) - throw Error("cannot compute store path for unlocked input '%s'", to_string()); - return store.makeFixedOutputPath(getName(), FixedOutputInfo { - .method = FileIngestionMethod::Recursive, - .hash = *narHash, - .references = {}, - }); + if (auto treeHash = getTreeHash()) + return store.makeFixedOutputPath(getName(), FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = *treeHash, + .references = {}, + }); + if (auto narHash = getNarHash()) + return store.makeFixedOutputPath(getName(), FixedOutputInfo { + .method = FileIngestionMethod::Recursive, + .hash = *narHash, + .references = {}, + }); + throw Error("cannot compute store path for unlocked input '%s'", to_string()); } std::string Input::getType() const @@ -262,6 +269,15 @@ std::optional Input::getRev() const return hash; } +std::optional Input::getTreeHash() const +{ + if (auto s = maybeGetStrAttr(attrs, "treeHash")) { + experimentalFeatureSettings.require(Xp::GitHashing); + return Hash::parseAny(*s, htSHA1); + } + return {}; +} + std::optional Input::getRevCount() const { if (auto n = maybeGetIntAttr(attrs, "revCount")) diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 6e10e95134fd..e7af5f9a8d17 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -110,6 +110,7 @@ public: std::optional getNarHash() const; std::optional getRef() const; std::optional getRev() const; + std::optional getTreeHash() const; std::optional getRevCount() const; std::optional getLastModified() const; }; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index f8d89ab2fcdd..5036bf511e71 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "git.hh" #include "url-parts.hh" #include "pathlocks.hh" #include "util.hh" @@ -200,7 +201,7 @@ WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) return WorkdirInfo { .clean = clean, .hasHead = hasHead }; } -std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) +std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo, FileIngestionMethod ingestionMethod) { const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); auto gitDir = ".git"; @@ -272,7 +273,7 @@ struct GitInputScheme : InputScheme attrs.emplace("type", "git"); for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") + if (name == "rev" || name == "ref" || name == "treeHash" || name == "gitIngestion") attrs.emplace(name, value); else if (name == "shallow" || name == "submodules" || name == "allRefs") attrs.emplace(name, Explicit { value == "1" }); @@ -290,7 +291,7 @@ struct GitInputScheme : InputScheme if (maybeGetStrAttr(attrs, "type") != "git") return {}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "dirtyRev" && name != "dirtyShortRev") + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "gitIngestion" && name != "treeHash" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "dirtyRev" && name != "dirtyShortRev") throw Error("unsupported Git input attribute '%s'", name); parseURL(getStrAttr(attrs, "url")); @@ -313,6 +314,9 @@ struct GitInputScheme : InputScheme auto url = parseURL(getStrAttr(input.attrs, "url")); if (url.scheme != "git") url.scheme = "git+" + url.scheme; if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev()); + if (auto treeHash = input.getTreeHash()) url.query.insert_or_assign("treeHash", treeHash->gitRev()); + if (maybeGetBoolAttr(input.attrs, "gitIngestion").value_or((bool) input.getTreeHash())) + url.query.insert_or_assign("gitIngestion", "1"); if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref); if (maybeGetBoolAttr(input.attrs, "shallow").value_or(false)) url.query.insert_or_assign("shallow", "1"); @@ -323,11 +327,18 @@ struct GitInputScheme : InputScheme { bool maybeDirty = !input.getRef(); bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); - return + bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + /* FIXME just requiring tree hash is necessary for substitutions to + work for now, but breaks eval purity. Need a better solution before + upstreaming. */ + return (input.getTreeHash() && !submodules) || ( maybeGetIntAttr(input.attrs, "lastModified") - && (shallow || maybeDirty || maybeGetIntAttr(input.attrs, "revCount")); + && (shallow || maybeDirty || maybeGetIntAttr(input.attrs, "revCount")) + && input.getNarHash()); } + /* FIXME no overriding the tree hash / flake registry support for tree + hashes, for now. */ Input applyOverrides( const Input & input, std::optional ref, @@ -364,7 +375,7 @@ struct GitInputScheme : InputScheme std::optional getSourcePath(const Input & input) override { auto url = parseURL(getStrAttr(input.attrs, "url")); - if (url.scheme == "file" && !input.getRef() && !input.getRev()) + if (url.scheme == "file" && !input.getRef() && !input.getRev() && !input.getTreeHash()) return url.path; return {}; } @@ -412,6 +423,11 @@ struct GitInputScheme : InputScheme if (submodules) cacheType += "-submodules"; if (allRefs) cacheType += "-all-refs"; + auto ingestionMethod = + maybeGetBoolAttr(input.attrs, "gitIngestion").value_or((bool) input.getTreeHash()) + ? FileIngestionMethod::Git + : FileIngestionMethod::Recursive; + auto checkHashType = [&](const std::optional & hash) { if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256)) @@ -420,20 +436,32 @@ struct GitInputScheme : InputScheme auto getLockedAttrs = [&]() { - checkHashType(input.getRev()); - - return Attrs({ + Attrs attrs({ {"type", cacheType}, {"name", name}, - {"rev", input.getRev()->gitRev()}, }); + if (auto optH = input.getTreeHash()) { + auto h = *std::move(optH); + checkHashType(h); + attrs.insert_or_assign("treeHash", h.gitRev()); + } + if (auto optH = input.getRev()) { + auto h = *std::move(optH); + checkHashType(h); + attrs.insert_or_assign("rev", h.gitRev()); + } + if (maybeGetBoolAttr(input.attrs, "gitIngestion").value_or((bool) input.getTreeHash())) + attrs.insert_or_assign("gitIngestion", true); + return attrs; }; auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) -> std::pair { - assert(input.getRev()); + assert(input.getRev() || input.getTreeHash()); + /* If was originally set, that original value must be preserved. */ assert(!_input.getRev() || _input.getRev() == input.getRev()); + assert(!_input.getTreeHash() || _input.getTreeHash() == input.getTreeHash()); if (!shallow) input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); @@ -450,10 +478,10 @@ struct GitInputScheme : InputScheme /* If this is a local directory and no ref or revision is given, allow fetching directly from a dirty workdir. */ - if (!input.getRef() && !input.getRev() && isLocal) { + if (!input.getRef() && !input.getRev() && !input.getTreeHash() && isLocal) { auto workdirInfo = getWorkdirInfo(input, actualUrl); if (!workdirInfo.clean) { - return fetchFromWorkdir(store, input, actualUrl, workdirInfo); + return fetchFromWorkdir(store, input, actualUrl, workdirInfo, ingestionMethod); } } @@ -462,6 +490,8 @@ struct GitInputScheme : InputScheme {"name", name}, {"url", actualUrl}, }); + if (ingestionMethod == FileIngestionMethod::Git) + unlockedAttrs.insert_or_assign("gitIngestion", true); Path repoDir; @@ -476,9 +506,14 @@ struct GitInputScheme : InputScheme unlockedAttrs.insert_or_assign("ref", *head); } - if (!input.getRev()) - input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); + if (!input.getRev() && !input.getTreeHash()) { + auto getHash = [&](std::string rev) { + return Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", rev })), htSHA1).gitRev(); + }; + input.attrs.insert_or_assign("rev", getHash(*input.getRef())); + if (experimentalFeatureSettings.isEnabled(Xp::GitHashing)) + input.attrs.insert_or_assign("treeHash", getHash(*input.getRef() + ":")); + } repoDir = actualUrl; } else { @@ -498,11 +533,33 @@ struct GitInputScheme : InputScheme } if (auto res = getCache()->lookup(store, unlockedAttrs)) { - auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); - if (!input.getRev() || input.getRev() == rev2) { - input.attrs.insert_or_assign("rev", rev2.gitRev()); - return makeResult(res->first, std::move(res->second)); + bool found = false; + + if (std::optional revS = maybeGetStrAttr(res->first, "rev")) { + auto rev2 = Hash::parseAny(*revS, htSHA1); + if (!input.getRev() || input.getRev() == rev2) { + input.attrs.insert_or_assign("rev", rev2.gitRev()); + found = true; + } } + + if (experimentalFeatureSettings.isEnabled(Xp::GitHashing)) { + if (std::optional treeHashS = maybeGetStrAttr(res->first, "treeHash")) { + auto treeHash2 = Hash::parseNonSRIUnprefixed(*treeHashS, htSHA1); + if (!input.getTreeHash() || input.getTreeHash() == treeHash2) { + input.attrs.insert_or_assign("treeHash", treeHash2.gitRev()); + found = true; + } + } + } + + bool correctIngestion = + maybeGetBoolAttr(input.attrs, "gitIngestion").value_or(false) + ? ingestionMethod == FileIngestionMethod::Git + : ingestionMethod == FileIngestionMethod::Recursive; + + if (correctIngestion && found) + return makeResult(res->first, std::move(res->second)); } Path cacheDir = getCachePath(actualUrl); @@ -524,11 +581,12 @@ struct GitInputScheme : InputScheme bool doFetch; time_t now = time(0); - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (input.getRev()) { + /* If a rev or treeHash is specified, we need to fetch if + it's not in the repo. */ + if (input.getRev() || input.getTreeHash()) { try { - runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); + auto fetchHash = input.getTreeHash() ? input.getTreeHash() : input.getRev(); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", fetchHash->gitRev() }); doFetch = false; } catch (ExecError & e) { if (WIFEXITED(e.status)) { @@ -575,12 +633,23 @@ struct GitInputScheme : InputScheme warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } - if (!input.getRev()) - input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev()); + if (!input.getRev() && !input.getTreeHash()) { + auto rev = Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev(); + input.attrs.insert_or_assign("rev", rev); + if (experimentalFeatureSettings.isEnabled(Xp::GitHashing)) + input.attrs.insert_or_assign("treeHash", + Hash::parseAny(chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", rev + ":" })), htSHA1).gitRev()); + } // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } + if (input.getTreeHash()) { + auto type = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-t", input.getTreeHash()->gitRev() })); + if (type != "tree") + throw Error("Need a tree object, found '%s' object in %s", type, input.getTreeHash()->gitRev()); + } + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; if (isShallow && !shallow) @@ -588,7 +657,10 @@ struct GitInputScheme : InputScheme // FIXME: check whether rev is an ancestor of ref. - printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), actualUrl); + if (auto rev = input.getRev()) + printTalkative("using revision %s of repo '%s'", rev->gitRev(), actualUrl); + if (auto treeHash = input.getTreeHash()) + printTalkative("using tree %s of repo '%s'", treeHash->gitRev(), actualUrl); /* Now that we know the ref, check again whether we have it in the store. */ @@ -599,10 +671,20 @@ struct GitInputScheme : InputScheme AutoDelete delTmpDir(tmpDir, true); PathFilter filter = defaultPathFilter; + auto [fetchHash, fetchHashType] = input.getTreeHash() + ? (std::pair { input.getTreeHash().value(), true }) + : (std::pair { input.getRev().value(), false }); + auto result = runProgram(RunOptions { .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, - .mergeStderrToStdout = true + .args = { + "-C", repoDir, + "--git-dir", gitDir, + "cat-file", + fetchHashType ? "tree" : "commit", + fetchHash.gitRev(), + }, + .mergeStderrToStdout = true, }); if (WEXITSTATUS(result.first) == 128 && result.second.find("bad file") != std::string::npos) @@ -612,13 +694,18 @@ struct GitInputScheme : InputScheme "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", - input.getRev()->gitRev(), + fetchHash.gitRev(), *input.getRef(), actualUrl ); } if (submodules) { + if (input.getTreeHash()) + throw Error("Cannot fetch specific tree hashes if there are submodules"); + if (ingestionMethod == FileIngestionMethod::Git) + warn("Nix's computed git tree hash will be different when submodules are converted to regular directories"); + Path tmpGitDir = createTempDir(); AutoDelete delTmpGitDir(tmpGitDir, true); @@ -633,7 +720,7 @@ struct GitInputScheme : InputScheme "--update-head-ok", "--", repoDir, "refs/*:refs/*" }, {}, true); } - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() }); + runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getTreeHash() ? input.getTreeHash()->gitRev() : input.getRev()->gitRev() }); /* Ensure that we use the correct origin for fetching submodules. This matters for submodules with relative @@ -662,12 +749,13 @@ struct GitInputScheme : InputScheme filter = isNotDotGitDirectory; } else { + auto & fetchHash_ = fetchHash; // Work-around clang restriction. // FIXME: should pipe this, or find some better way to extract a // revision. auto source = sinkToSource([&](Sink & sink) { runProgram2({ .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "archive", fetchHash_.gitRev() }, .standardOut = &sink }); }); @@ -675,20 +763,39 @@ struct GitInputScheme : InputScheme unpackTarfile(*source, tmpDir); } - auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); + auto storePath = store->addToStore(name, tmpDir, ingestionMethod, ingestionMethod == FileIngestionMethod::Git ? htSHA1 : htSHA256, filter); - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + // verify treeHash is what we actually obtained in the nix store + if (auto treeHash = input.getTreeHash()) { + auto path = store->toRealPath(store->printStorePath(storePath)); + auto gotHash = dumpGitHash(htSHA1, path); + if (gotHash != input.getTreeHash()) + throw Error("Git hash mismatch in input '%s' (%s), expected '%s', got '%s'", + input.to_string(), path, treeHash->gitRev(), gotHash.gitRev()); + } - Attrs infoAttrs({ - {"rev", input.getRev()->gitRev()}, - {"lastModified", lastModified}, - }); + Attrs infoAttrs({}); + + if (auto rev = input.getRev()) { + infoAttrs.insert_or_assign("rev", rev->gitRev()); + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", rev->gitRev() })); + infoAttrs.insert_or_assign("lastModified", lastModified); + } else + infoAttrs.insert_or_assign("lastModified", (uint64_t) 0); - if (!shallow) - infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); + if (experimentalFeatureSettings.isEnabled(Xp::GitHashing)) + if (auto treeHash = input.getTreeHash()) + infoAttrs.insert_or_assign("treeHash", treeHash->gitRev()); + + if (!shallow) { + if (auto rev = input.getRev()) + infoAttrs.insert_or_assign("revCount", + std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", rev->gitRev() }))); + else + infoAttrs.insert_or_assign("revCount", (uint64_t) 0); + } - if (!_input.getRev()) + if (!_input.getRev() && !_input.getTreeHash()) getCache()->add( store, unlockedAttrs, diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 291f457f0d35..4fa4f3e07b29 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -134,7 +134,7 @@ struct GitArchiveInputScheme : InputScheme bool hasAllInfo(const Input & input) const override { - return input.getRev() && maybeGetIntAttr(input.attrs, "lastModified"); + return input.getNarHash() && input.getRev() && maybeGetIntAttr(input.attrs, "lastModified"); } Input applyOverrides( diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 51fd1ed428b0..74cb9c36380a 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -102,7 +102,8 @@ struct MercurialInputScheme : InputScheme { // FIXME: ugly, need to distinguish between dirty and clean // default trees. - return input.getRef() == "default" || maybeGetIntAttr(input.attrs, "revCount"); + return input.getNarHash() + && (input.getRef() == "default" || maybeGetIntAttr(input.attrs, "revCount")); } Input applyOverrides( diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index 01f1be97822c..10670bb4ac5e 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -68,7 +68,7 @@ struct PathInputScheme : InputScheme bool hasAllInfo(const Input & input) const override { - return true; + return (bool) input.getNarHash(); } std::optional getSourcePath(const Input & input) override diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index b4fea693f5b8..df7f63fbd9f8 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -411,10 +411,15 @@ StorePath BinaryCacheStore::addToStore( implementation of this method in terms of addToStoreFromDump. */ HashSink sink { hashAlgo }; - if (method == FileIngestionMethod::Recursive) { + switch (method) { + case FileIngestionMethod::Recursive: dumpPath(srcPath, sink, filter); - } else { + break; + case FileIngestionMethod::Flat: readFile(srcPath, sink); + break; + case FileIngestionMethod::Git: + throw Error("cannot add to binary cache store using the git file ingestion method"); } auto h = sink.finish().first; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 64b55ca6ac2d..9883321efeb6 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -8,6 +8,7 @@ #include "finally.hh" #include "util.hh" #include "archive.hh" +#include "git.hh" #include "compression.hh" #include "daemon.hh" #include "topo-sort.hh" @@ -2480,23 +2481,34 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() rewriteOutput(outputRewrites); /* FIXME optimize and deduplicate with addToStore */ std::string oldHashPart { scratchPath->hashPart() }; - HashModuloSink caSink { outputHash.hashType, oldHashPart }; + Hash got { outputHash.hashType }; // Dummy value std::visit(overloaded { [&](const TextIngestionMethod &) { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; readFile(actualPath, caSink); + got = caSink.finish().first; }, [&](const FileIngestionMethod & m2) { switch (m2) { - case FileIngestionMethod::Recursive: + case FileIngestionMethod::Recursive: { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; dumpPath(actualPath, caSink); + got = caSink.finish().first; break; - case FileIngestionMethod::Flat: + } + case FileIngestionMethod::Flat: { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; readFile(actualPath, caSink); + got = caSink.finish().first; + break; + } + case FileIngestionMethod::Git: { + got = dumpGitHash(outputHash.hashType, (Path) tmpDir + "/tmp"); break; } + } }, }, outputHash.method.raw); - auto got = caSink.finish().first; auto optCA = ContentAddressWithReferences::fromPartsOpt( outputHash.method, diff --git a/src/libstore/content-address.cc b/src/libstore/content-address.cc index e290a8d387eb..0a5bc8424029 100644 --- a/src/libstore/content-address.cc +++ b/src/libstore/content-address.cc @@ -11,6 +11,8 @@ std::string makeFileIngestionPrefix(FileIngestionMethod m) return ""; case FileIngestionMethod::Recursive: return "r:"; + case FileIngestionMethod::Git: + return "git:"; default: throw Error("impossible, caught both cases"); } @@ -32,6 +34,8 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m) ContentAddressMethod method = FileIngestionMethod::Flat; if (splitPrefix(m, "r:")) method = FileIngestionMethod::Recursive; + if (splitPrefix(m, "git:")) + method = FileIngestionMethod::Git; else if (splitPrefix(m, "text:")) method = TextIngestionMethod {}; return method; @@ -99,6 +103,8 @@ static std::pair parseContentAddressMethodPrefix auto method = FileIngestionMethod::Flat; if (splitPrefix(rest, "r:")) method = FileIngestionMethod::Recursive; + if (splitPrefix(rest, "git:")) + method = FileIngestionMethod::Git; HashType hashType = parseHashType_(); return { std::move(method), diff --git a/src/libstore/content-address.hh b/src/libstore/content-address.hh index c4d619bdc63f..5d92cd575f9d 100644 --- a/src/libstore/content-address.hh +++ b/src/libstore/content-address.hh @@ -39,12 +39,26 @@ enum struct FileIngestionMethod : uint8_t { /** * Flat-file hashing. Directly ingest the contents of a single file */ - Flat = false, + Flat, + /** * Recursive (or NAR) hashing. Serializes the file-system object in Nix * Archive format and ingest that */ - Recursive = true + Recursive, + + /** + * Git hashing. In particular files are hashed as git "blobs", and + * directories are hashed as git "trees". + * + * @note Git's data model is slightly different, in that a plain + * fail doesn't have an executable bit, directory entries do + * instead. We decide treat a bare file as non-executable by fiat, + * as we do with `FileIngestionMethod::Flat` which also lacks this + * information. Thus, Git can encode some but all of Nix's "File + * System Objects", and this sort of hashing is likewise partial. + */ + Git, }; /** diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 8cbf6f044faf..af6d6d69b68e 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -13,6 +13,7 @@ #include "archive.hh" #include "derivations.hh" #include "args.hh" +#include "git.hh" namespace nix::daemon { @@ -462,13 +463,17 @@ static void performOp(TunnelLogger * logger, ref store, TeeSource savedNARSource(from, saved); ParseSink sink; /* null sink; just parse the NAR */ parseDump(sink, savedNARSource); - } else { + } else if (method == FileIngestionMethod::Flat) { /* Incrementally parse the NAR file, stripping the metadata, and streaming the sole file we expect into `saved`. */ RetrieveRegularNARSink savedRegular { saved }; parseDump(savedRegular, from); if (!savedRegular.regular) throw Error("regular file expected"); + } else { + /* Should have validated above that no other file ingestion + method was used. */ + assert(false); } }); logger->startWork(); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 17b4ecc73125..7cafa8882104 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1,5 +1,6 @@ #include "local-store.hh" #include "globals.hh" +#include "git.hh" #include "archive.hh" #include "pathlocks.hh" #include "worker-protocol.hh" @@ -1322,10 +1323,17 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - if (method == FileIngestionMethod::Recursive) - restorePath(tempPath, bothSource); - else + switch (method) { + case FileIngestionMethod::Flat: writeFile(tempPath, bothSource); + break; + case FileIngestionMethod::Recursive: + restorePath(tempPath, bothSource); + break; + case FileIngestionMethod::Git: + restoreGit(tempPath, bothSource, realStoreDir, storeDir); + break; + } dump.clear(); } @@ -1364,10 +1372,17 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name if (inMemory) { StringSource dumpSource { dump }; /* Restore from the NAR in memory. */ - if (method == FileIngestionMethod::Recursive) - restorePath(realPath, dumpSource); - else + switch (method) { + case FileIngestionMethod::Flat: writeFile(realPath, dumpSource); + break; + case FileIngestionMethod::Recursive: + restorePath(realPath, dumpSource); + break; + case FileIngestionMethod::Git: + restoreGit(realPath, dumpSource, realStoreDir, storeDir); + break; + } } else { /* Move the temporary path we restored above. */ moveFile(tempPath, realPath); @@ -1866,25 +1881,37 @@ ContentAddress LocalStore::hashCAPath( const std::string_view pathHash ) { - HashModuloSink caSink ( hashType, std::string(pathHash) ); + Hash hash { htSHA256 }; // throwaway def to appease C++ std::visit(overloaded { [&](const TextIngestionMethod &) { + HashModuloSink caSink ( hashType, std::string(pathHash) ); readFile(path, caSink); + hash = caSink.finish().first; }, [&](const FileIngestionMethod & m2) { switch (m2) { - case FileIngestionMethod::Recursive: + case FileIngestionMethod::Recursive: { + HashModuloSink caSink ( hashType, std::string(pathHash) ); dumpPath(path, caSink); + hash = caSink.finish().first; break; - case FileIngestionMethod::Flat: + } + case FileIngestionMethod::Flat: { + HashModuloSink caSink ( hashType, std::string(pathHash) ); readFile(path, caSink); + hash = caSink.finish().first; break; } + case FileIngestionMethod::Git: { + hash = dumpGitHash(hashType, path); + break; + } + } }, }, method.raw); return ContentAddress { .method = method, - .hash = caSink.finish().first, + .hash = std::move(hash), }; } diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index f0dfcb19b77e..3cdd6b645136 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -69,9 +69,10 @@ struct NarAccessor : public FSAccessor createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); } - void createRegularFile(const Path & path) override + void createRegularFile(const Path & path, bool executable = false) override { createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); + if (executable) isExecutable(); } void closeRegularFile() override diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index 58f72beb90c1..de16dfe920e9 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -13,6 +13,7 @@ #include "derivations.hh" #include "pool.hh" #include "finally.hh" +#include "git.hh" #include "logging.hh" #include "callback.hh" #include "filetransfer.hh" diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 28689e100e24..035588c1a10a 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -10,6 +10,7 @@ #include "references.hh" #include "archive.hh" #include "callback.hh" +#include "git.hh" #include "remote-store.hh" #include @@ -114,8 +115,8 @@ StorePath Store::followLinksToStorePath(std::string_view path) const for paths copied by addToStore() or produced by fixed-output derivations: the string "fixed:out:::", where - = "r:" for recursive (path) hashes, or "" for flat - (file) hashes + = "r:" for recursive (path) hashes, "git:" for git + paths, or "" for flat (file) hashes = "md5", "sha1" or "sha256" = base-16 representation of the path or flat hash of the contents of the path (or expected contents of the @@ -184,6 +185,9 @@ static std::string makeType( StorePath Store::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const { + if (info.method == FileIngestionMethod::Git && info.hash.type != htSHA1) + throw Error("Git file ingestion must use sha1 hash"); + if (info.hash.type == htSHA256 && info.method == FileIngestionMethod::Recursive) { return makeStorePath(makeType(*this, "source", info.references), info.hash, name); } else { @@ -228,9 +232,22 @@ StorePath Store::makeFixedOutputPathFromCA(std::string_view name, const ContentA std::pair Store::computeStorePathForPath(std::string_view name, const Path & srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter) const { - Hash h = method == FileIngestionMethod::Recursive - ? hashPath(hashAlgo, srcPath, filter).first - : hashFile(hashAlgo, srcPath); + Hash h { htSHA256 }; // throwaway def to appease C++ + switch (method) { + case FileIngestionMethod::Recursive: { + h = hashPath(hashAlgo, srcPath, filter).first; + break; + } + case FileIngestionMethod::Git: { + h = hashGit(hashAlgo, srcPath, filter).first; + break; + } + case FileIngestionMethod::Flat: { + h = hashFile(hashAlgo, srcPath); + break; + } + } + FixedOutputInfo caInfo { .method = method, .hash = h, @@ -263,10 +280,28 @@ StorePath Store::addToStore( { Path srcPath(absPath(_srcPath)); auto source = sinkToSource([&](Sink & sink) { - if (method == FileIngestionMethod::Recursive) + switch (method) { + case FileIngestionMethod::Recursive: { dumpPath(srcPath, sink, filter); - else + break; + } + case FileIngestionMethod::Git: { + // recursively add to store if path is a directory + struct stat st; + if (lstat(srcPath.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", srcPath); + if (S_ISDIR(st.st_mode)) + for (auto & i : readDirectory(srcPath)) + addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair); + + dumpGit(hashAlgo, srcPath, sink, filter); + break; + } + case FileIngestionMethod::Flat: { readFile(srcPath, sink); + break; + } + } }); return addToStoreFromDump(*source, name, method, hashAlgo, repair, references); } @@ -430,6 +465,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 ? narHash + : method == FileIngestionMethod::Git + ? hashGit(hashAlgo, srcPath).first : caHashSink.finish().first; if (expectedCAHash && expectedCAHash != hash) diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 268a798d900d..4e3ed4a7534e 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -302,71 +302,6 @@ void parseDump(ParseSink & sink, Source & source) } -struct RestoreSink : ParseSink -{ - Path dstPath; - AutoCloseFD fd; - - void createDirectory(const Path & path) override - { - Path p = dstPath + path; - if (mkdir(p.c_str(), 0777) == -1) - throw SysError("creating directory '%1%'", p); - }; - - void createRegularFile(const Path & path) override - { - Path p = dstPath + path; - fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666); - if (!fd) throw SysError("creating file '%1%'", p); - } - - void closeRegularFile() override - { - /* Call close explicitly to make sure the error is checked */ - fd.close(); - } - - void isExecutable() override - { - struct stat st; - if (fstat(fd.get(), &st) == -1) - throw SysError("fstat"); - if (fchmod(fd.get(), st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1) - throw SysError("fchmod"); - } - - void preallocateContents(uint64_t len) override - { - if (!archiveSettings.preallocateContents) - return; - -#if HAVE_POSIX_FALLOCATE - if (len) { - errno = posix_fallocate(fd.get(), 0, len); - /* Note that EINVAL may indicate that the underlying - filesystem doesn't support preallocation (e.g. on - OpenSolaris). Since preallocation is just an - optimisation, ignore it. */ - if (errno && errno != EINVAL && errno != EOPNOTSUPP && errno != ENOSYS) - throw SysError("preallocating file of %1% bytes", len); - } -#endif - } - - void receiveContents(std::string_view data) override - { - writeFull(fd.get(), data); - } - - void createSymlink(const Path & path, const std::string & target) override - { - Path p = dstPath + path; - nix::createSymlink(target, p); - } -}; - - void restorePath(const Path & path, Source & source) { RestoreSink sink; diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index 2cf164a417db..36e6fc41fb0a 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -3,7 +3,7 @@ #include "types.hh" #include "serialise.hh" - +#include "fs-sink.hh" namespace nix { @@ -72,22 +72,6 @@ time_t dumpPathAndGetMtime(const Path & path, Sink & sink, */ void dumpString(std::string_view s, Sink & sink); -/** - * \todo Fix this API, it sucks. - */ -struct ParseSink -{ - virtual void createDirectory(const Path & path) { }; - - virtual void createRegularFile(const Path & path) { }; - virtual void closeRegularFile() { }; - virtual void isExecutable() { }; - virtual void preallocateContents(uint64_t size) { }; - virtual void receiveContents(std::string_view data) { }; - - virtual void createSymlink(const Path & path, const std::string & target) { }; -}; - /** * If the NAR archive contains a single file at top-level, then save * the contents of the file to `s`. Otherwise barf. diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 782331283cad..4f2ae03c46cb 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -12,7 +12,7 @@ struct ExperimentalFeatureDetails std::string_view description; }; -constexpr std::array xpFeatureDetails = {{ +constexpr std::array xpFeatureDetails = {{ { .tag = Xp::CaDerivations, .name = "ca-derivations", @@ -70,6 +70,14 @@ constexpr std::array xpFeatureDetails = {{ [`nix`](@docroot@/command-ref/new-cli/nix.md) for details. )", }, + { + .tag = Xp::GitHashing, + .name = "git-hashing", + .description = R"( + Allow creating (content-addressed) store objects which are hashed via Git's hashing algorithm. + These store objects will not be understandable by older versions of Nix. + )", + }, { .tag = Xp::RecursiveNix, .name = "recursive-nix", diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index add592ae6245..f0e09a41f121 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -21,6 +21,7 @@ enum struct ExperimentalFeature ImpureDerivations, Flakes, NixCommand, + GitHashing, RecursiveNix, NoUrlLiterals, FetchClosure, diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc new file mode 100644 index 000000000000..978b676ba4eb --- /dev/null +++ b/src/libutil/fs-sink.cc @@ -0,0 +1,104 @@ +#include + +#include "config.hh" +#include "fs-sink.hh" + +namespace nix { + + +struct RestoreSinkSettings : Config +{ + Setting preallocateContents{this, true, "preallocate-contents", + "Whether to preallocate files when writing objects with known size."}; +}; + +static RestoreSinkSettings restoreSinkSettings; + +static GlobalConfig::Register r1(&restoreSinkSettings); + + +void RestoreSink::createDirectory(const Path & path) +{ + Path p = dstPath + path; + if (mkdir(p.c_str(), 0777) == -1) + throw SysError("creating directory '%1%'", p); +}; + +void RestoreSink::createRegularFile(const Path & path, bool executable) +{ + Path p = dstPath + path; + fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, executable ? 0777 : 0666); + if (!fd) throw SysError("creating file '%1%'", p); +} + +void RestoreSink::closeRegularFile() +{ + /* Call close explicitly to make sure the error is checked */ + fd.close(); +} + +void RestoreSink::isExecutable() +{ + struct stat st; + if (fstat(fd.get(), &st) == -1) + throw SysError("fstat"); + if (fchmod(fd.get(), st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1) + throw SysError("fchmod"); +} + +void RestoreSink::preallocateContents(uint64_t len) +{ + if (!restoreSinkSettings.preallocateContents) + return; + +#ifdef HAVE_POSIX_FALLOCATE + if (len) { + errno = posix_fallocate(fd.get(), 0, len); + /* Note that EINVAL may indicate that the underlying + filesystem doesn't support preallocation (e.g. on + OpenSolaris). Since preallocation is just an + optimisation, ignore it. */ + if (errno && errno != EINVAL && errno != EOPNOTSUPP && errno != ENOSYS) + throw SysError("preallocating file of %1% bytes", len); + } +#endif +} + +void RestoreSink::receiveContents(std::string_view data) +{ + writeFull(fd.get(), data); +} + +void RestoreSink::createSymlink(const Path & path, const std::string & target) +{ + Path p = dstPath + path; + nix::createSymlink(target, p); +} + +void RestoreSink::copyFile(const Path & source) +{ + FdSink sink(fd.get()); + readFile(source, sink); +} + +void RestoreSink::copyDirectory(const Path & source, const Path & destination) +{ + Path p = dstPath + destination; + createDirectory(destination); + for (auto & i : readDirectory(source)) { + struct stat st; + Path entry = source + "/" + i.name; + if (lstat(entry.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", entry); + if (S_ISREG(st.st_mode)) { + createRegularFile(destination + "/" + i.name, st.st_mode & S_IXUSR); + copyFile(entry); + } else if (S_ISDIR(st.st_mode)) + copyDirectory(entry, destination + "/" + i.name); + else + throw Error("Unknown file: %s", entry); + } +} + + +} diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh new file mode 100644 index 000000000000..dfb9c67c1934 --- /dev/null +++ b/src/libutil/fs-sink.hh @@ -0,0 +1,48 @@ +#pragma once + +#include "types.hh" +#include "serialise.hh" + +namespace nix { + +/** + * \todo Fix this API, it sucks. + */ +struct ParseSink +{ + virtual void createDirectory(const Path & path) { }; + + virtual void createRegularFile(const Path & path, bool executable = false) { }; + virtual void closeRegularFile() { }; + virtual void isExecutable() { }; + virtual void preallocateContents(uint64_t size) { }; + virtual void receiveContents(std::string_view data) { }; + + virtual void createSymlink(const Path & path, const std::string & target) { }; + + virtual void copyFile(const Path & source) { }; + virtual void copyDirectory(const Path & source, const Path & destination) { }; +}; + +struct RestoreSink : ParseSink +{ + Path dstPath; + AutoCloseFD fd; + + + void createDirectory(const Path & path) override; + + void createRegularFile(const Path & path, bool executable = false) override; + void closeRegularFile() override; + void isExecutable() override; + void preallocateContents(uint64_t size) override; + void receiveContents(std::string_view data) override; + + void createSymlink(const Path & path, const std::string & target) override; + + void copyFile(const Path & source) override; + void copyDirectory(const Path & source, const Path & destination) override; +}; + + +} diff --git a/src/libutil/git.cc b/src/libutil/git.cc index f35c2fdb75cf..9bfdd48ac5e9 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -1,8 +1,238 @@ -#include "git.hh" +#include +#include +#include +#include #include +#include // for strcasecmp + +#include +#include +#include +#include +#include + +#include "util.hh" +#include "config.hh" +#include "hash.hh" + +#include "git.hh" +#include "serialise.hh" + +using namespace std::string_literals; + namespace nix { + +static void parse(ParseSink & sink, Source & source, const Path & path, const Path & realStoreDir, const Path & storeDir); + +// Converts a Path to a ParseSink +void restoreGit(const Path & path, Source & source, const Path & realStoreDir, const Path & storeDir) { + RestoreSink sink; + sink.dstPath = path; + parseGit(sink, source, realStoreDir, storeDir); +} + +void parseGit(ParseSink & sink, Source & source, const Path & realStoreDir, const Path & storeDir) +{ + parse(sink, source, "", realStoreDir, storeDir); +} + +static std::string getStringUntil(Source & source, char byte) +{ + std::string s; + char n[1]; + source(std::string_view { n, 1 }); + while (*n != byte) { + s += *n; + source(std::string_view { n, 1 }); + } + return s; +} + +static std::string getString(Source & source, int n) +{ + std::string v; + v.resize(n); + source(v); + return v; +} + +// Unfortunately, no access to libstore headers here. +static std::string getStoreEntry(const Path & storeDir, Hash hash, std::string name) +{ + Hash hash1 = hashString(htSHA256, "fixed:out:git:" + hash.to_string(Base::Base16, true) + ":"); + Hash hash2 = hashString(htSHA256, "output:out:" + hash1.to_string(Base::Base16, true) + ":" + storeDir + ":" + name); + Hash hash3 = compressHash(hash2, 20); + + return hash3.to_string(Base::Base32, false) + "-" + name; +} + +static void parse(ParseSink & sink, Source & source, const Path & path, const Path & realStoreDir, const Path & storeDir) +{ + auto type = getString(source, 5); + + if (type == "blob ") { + sink.createRegularFile(path); + + unsigned long long size = std::stoi(getStringUntil(source, 0)); + + sink.preallocateContents(size); + + unsigned long long left = size; + std::string buf; + buf.reserve(65536); + + while (left) { + checkInterrupt(); + buf.resize(std::min((unsigned long long)buf.capacity(), left)); + source(buf); + sink.receiveContents(buf); + left -= buf.size(); + } + } else if (type == "tree ") { + unsigned long long size = std::stoi(getStringUntil(source, 0)); + unsigned long long left = size; + + sink.createDirectory(path); + + while (left) { + std::string perms = getStringUntil(source, ' '); + left -= perms.size(); + left -= 1; + + int perm = std::stoi(perms); + if (perm != 100644 && perm != 100755 && perm != 644 && perm != 755 && perm != 40000) + throw Error("Unknown Git permission: %d", perm); + + std::string name = getStringUntil(source, 0); + left -= name.size(); + left -= 1; + + std::string hashs = getString(source, 20); + left -= 20; + + Hash hash(htSHA1); + std::copy(hashs.begin(), hashs.end(), hash.hash); + + std::string entryName = getStoreEntry(storeDir, hash, "git"); + Path entry = absPath(realStoreDir + "/" + entryName); + + struct stat st; + if (lstat(entry.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", entry); + + if (S_ISREG(st.st_mode)) { + if (perm == 40000) + throw SysError("file is a file but expected to be a directory '%1%'", entry); + + sink.createRegularFile(path + "/" + name, perm == 100755 || perm == 755); + + sink.copyFile(entry); + } else if (S_ISDIR(st.st_mode)) { + if (perm != 40000) + throw SysError("file is a directory but expected to be a file '%1%'", entry); + + sink.copyDirectory(realStoreDir + "/" + entryName, path + "/" + name); + } else throw Error("file '%1%' has an unsupported type", entry); + } + } else throw Error("input doesn't look like a Git object"); +} + +// TODO stream file into sink, rather than reading into vector +GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink) +{ + auto s = fmt("blob %d\0%s"s, std::to_string(st.st_size), readFile(path)); + + std::string v; + std::copy(s.begin(), s.end(), std::back_inserter(v)); + sink(v); + return st.st_mode & S_IXUSR + ? GitMode::Executable + : GitMode::Regular; +} + +GitMode dumpGitTree(const GitTree & entries, Sink & sink) +{ + std::vector v1; + + for (auto & i : entries) { + unsigned int mode; + switch (i.second.first) { + case GitMode::Directory: mode = 40000; break; + case GitMode::Executable: mode = 100755; break; + case GitMode::Regular: mode = 100644; break; + } + auto name = i.first; + if (i.second.first == GitMode::Directory) + name.pop_back(); + auto s1 = fmt("%d %s", mode, name); + std::copy(s1.begin(), s1.end(), std::back_inserter(v1)); + v1.push_back(0); + std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1)); + } + + std::string v2; + auto s2 = fmt("tree %d"s, v1.size()); + std::copy(s2.begin(), s2.end(), std::back_inserter(v2)); + v2.push_back(0); + std::copy(v1.begin(), v1.end(), std::back_inserter(v2)); + + sink(v2); + + return GitMode::Directory; +} + +static std::pair dumpGitHashInternal(HashType ht, const Path & path, PathFilter & filter); + +static GitMode dumpGitInternal(HashType ht, const Path & path, Sink & sink, PathFilter & filter) +{ + struct stat st; + GitMode perm; + if (lstat(path.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", path); + + if (S_ISREG(st.st_mode)) + perm = dumpGitBlob(path, st, sink); + else if (S_ISDIR(st.st_mode)) { + GitTree entries; + for (auto & i : readDirectory(path)) + if (filter(path + "/" + i.name)) { + auto result = dumpGitHashInternal(ht, path + "/" + i.name, filter); + + // correctly observe git order, see + // https://github.com/mirage/irmin/issues/352 + auto name = i.name; + if (result.first == GitMode::Directory) + name += "/"; + + entries.insert_or_assign(name, result); + } + perm = dumpGitTree(entries, sink); + } else throw Error("file '%1%' has an unsupported type", path); + + return perm; +} + + +static std::pair dumpGitHashInternal(HashType ht, const Path & path, PathFilter & filter) +{ + auto hashSink = new HashSink(ht); + auto perm = dumpGitInternal(ht, path, *hashSink, filter); + auto hash = hashSink->finish().first; + return std::pair { perm, hash }; +} + +Hash dumpGitHash(HashType ht, const Path & path, PathFilter & filter) +{ + return dumpGitHashInternal(ht, path, filter).second; +} + +void dumpGit(HashType ht, const Path & path, Sink & sink, PathFilter & filter) +{ + dumpGitInternal(ht, path, sink, filter); +} + namespace git { std::optional parseLsRemoteLine(std::string_view line) @@ -22,4 +252,5 @@ std::optional parseLsRemoteLine(std::string_view line) } } + } diff --git a/src/libutil/git.hh b/src/libutil/git.hh index bf2b9a2869ab..e39dfae163be 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -5,8 +5,36 @@ #include #include +#include "types.hh" +#include "serialise.hh" +#include "hash.hh" +#include "fs-sink.hh" + namespace nix { +enum struct GitMode { + Directory, + Executable, + Regular, +}; + +void restoreGit(const Path & path, Source & source, const Path & realStoreDir, const Path & storeDir); + +void parseGit(ParseSink & sink, Source & source, const Path & realStoreDir, const Path & storeDir); + +// Dumps a single file to a sink +GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink); + +typedef std::map> GitTree; + +// Dumps a representation of a git tree to a sink +GitMode dumpGitTree(const GitTree & entries, Sink & sink); + +// Recursively dumps path, hashing as we go +Hash dumpGitHash(HashType ht, const Path & path, PathFilter & filter = defaultPathFilter); + +void dumpGit(HashType ht, const Path & path, Sink & sink, PathFilter & filter = defaultPathFilter); + namespace git { /** diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index 2c36d9d9498e..f04c6b9041df 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -8,6 +8,7 @@ #include "args.hh" #include "hash.hh" #include "archive.hh" +#include "git.hh" #include "split.hh" #include "util.hh" @@ -375,6 +376,13 @@ HashResult hashPath( return sink.finish(); } +HashResult hashGit( + HashType ht, const Path & path, PathFilter & filter) +{ + HashSink sink(ht); + dumpGit(ht, path, sink, filter); + return sink.finish(); +} Hash compressHash(const Hash & hash, unsigned int newSize) { diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh index ae3ee40f4e34..0d4abd72b8f1 100644 --- a/src/libutil/hash.hh +++ b/src/libutil/hash.hh @@ -145,18 +145,28 @@ std::string printHash16or32(const Hash & hash); Hash hashString(HashType ht, std::string_view s); /** - * Compute the hash of the given file. + * Compute the hash of the given file, hashing its contents directly. + * + * (Metadata, such as the executable permission bit, is ignored.) */ Hash hashFile(HashType ht, const Path & path); /** - * Compute the hash of the given path. The hash is defined as - * (essentially) hashString(ht, dumpPath(path)). + * Compute the hash of the given path, serializing as a Nix Archive and + * then hashing that. + * + * The hash is defined as (essentially) hashString(ht, dumpPath(path)). */ typedef std::pair HashResult; HashResult hashPath(HashType ht, const Path & path, PathFilter & filter = defaultPathFilter); +/** + * Compute the git blob/tree hash of the given path. + */ +HashResult hashGit(HashType ht, const Path & path, + PathFilter & filter = defaultPathFilter); + /** * Compress a hash to the specified number of bytes by cyclically * XORing bytes together. diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 3d5121a19fa6..e5205ce79f00 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -74,6 +74,10 @@ void Source::operator () (char * data, size_t len) } } +void Source::operator () (std::string_view data) +{ + (*this)((char *)data.data(), data.size()); +} void Source::drainInto(Sink & sink) { diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 333c254ea8e3..71c40c83a7d3 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -72,6 +72,7 @@ struct Source * an error if it is not going to be available. */ void operator () (char * data, size_t len); + void operator () (std::string_view data); /** * Store up to ‘len’ in the buffer pointed to by ‘data’, and diff --git a/src/nix/add-to-store.cc b/src/nix/add-to-store.cc index 39e5cc99dd2f..f3e9249d2bbd 100644 --- a/src/nix/add-to-store.cc +++ b/src/nix/add-to-store.cc @@ -2,6 +2,7 @@ #include "common-args.hh" #include "store-api.hh" #include "archive.hh" +#include "git.hh" using namespace nix; @@ -34,11 +35,22 @@ struct CmdAddToStore : MixDryRun, StoreCommand auto narHash = hashString(htSHA256, sink.s); - Hash hash = narHash; - if (ingestionMethod == FileIngestionMethod::Flat) { + Hash hash { htSHA256 }; // throwaway def to appease C++ + switch (ingestionMethod) { + case FileIngestionMethod::Recursive: { + hash = narHash; + break; + } + case FileIngestionMethod::Flat: { HashSink hsink(htSHA256); readFile(path, hsink); hash = hsink.finish().first; + break; + } + case FileIngestionMethod::Git: { + hash = dumpGitHash(htSHA1, path); + break; + } } ValidPathInfo info { @@ -102,5 +114,26 @@ struct CmdAddPath : CmdAddToStore } }; +struct CmdAddGit : CmdAddToStore +{ + CmdAddGit() + { + ingestionMethod = FileIngestionMethod::Git; + } + + std::string description() override + { + return "add a path to the Nix store"; + } + + std::string doc() override + { + return + #include "add-path.md" + ; + } +}; + static auto rCmdAddFile = registerCommand2({"store", "add-file"}); static auto rCmdAddPath = registerCommand2({"store", "add-path"}); +static auto rCmdAddGit = registerCommand2({"store", "add-git"}); diff --git a/src/nix/hash.cc b/src/nix/hash.cc index 9feca934557c..368227ab6288 100644 --- a/src/nix/hash.cc +++ b/src/nix/hash.cc @@ -5,6 +5,7 @@ #include "shared.hh" #include "references.hh" #include "archive.hh" +#include "git.hh" using namespace nix; @@ -65,9 +66,11 @@ struct CmdHashBase : Command { switch (mode) { case FileIngestionMethod::Flat: - return "print cryptographic hash of a regular file"; + return "print cryptographic hash of a regular file"; case FileIngestionMethod::Recursive: return "print cryptographic hash of the NAR serialisation of a path"; + case FileIngestionMethod::Git: + return "print cryptographic hash of the Git serialisation of a path"; default: assert(false); }; @@ -84,15 +87,21 @@ struct CmdHashBase : Command hashSink = std::make_unique(ht); switch (mode) { - case FileIngestionMethod::Flat: + case FileIngestionMethod::Flat: { readFile(path, *hashSink); break; - case FileIngestionMethod::Recursive: + } + case FileIngestionMethod::Recursive: { dumpPath(path, *hashSink); break; } + case FileIngestionMethod::Git: + dumpGit(ht, path, *hashSink); + break; + } + + auto h = hashSink->finish().first; - Hash h = hashSink->finish().first; if (truncate && h.hashSize > 20) h = compressHash(h, 20); logger->cout(h.to_string(base, base == SRI)); } @@ -133,6 +142,7 @@ struct CmdHash : NixMultiCommand : MultiCommand({ {"file", []() { return make_ref(FileIngestionMethod::Flat);; }}, {"path", []() { return make_ref(FileIngestionMethod::Recursive); }}, + {"git", []() { return make_ref(FileIngestionMethod::Git); }}, {"to-base16", []() { return make_ref(Base16); }}, {"to-base32", []() { return make_ref(Base32); }}, {"to-base64", []() { return make_ref(Base64); }}, diff --git a/tests/git.sh b/tests/git.sh new file mode 100644 index 000000000000..983033e6a28d --- /dev/null +++ b/tests/git.sh @@ -0,0 +1,83 @@ +source common.sh + +clearStore +clearCache + +enableFeatures "git-hashing" + +try () { + hash=$(nix hash git --base16 --type sha1 $TEST_ROOT/hash-path) + if test "$hash" != "$1"; then + echo "git hash, expected $1, got $hash" + exit 1 + fi +} + +rm -rf $TEST_ROOT/hash-path +mkdir $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path/hello + +try "117c62a8c5e01758bd284126a6af69deab9dbbe2" + +rm -rf $TEST_ROOT/dummy1 +echo Hello World! > $TEST_ROOT/dummy1 +path1=$(nix store add-git $TEST_ROOT/dummy1) +hash1=$(nix-store -q --hash $path1) +test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v" + +rm -rf $TEST_ROOT/dummy2 +mkdir -p $TEST_ROOT/dummy2 +echo Hello World! > $TEST_ROOT/dummy2/hello +path2=$(nix store add-git $TEST_ROOT/dummy2) +hash2=$(nix-store -q --hash $path2) +test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0" + +rm -rf $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3/hello +echo Hello World! > $TEST_ROOT/dummy3/hello/hello +path3=$(nix store add-git $TEST_ROOT/dummy3) +hash3=$(nix-store -q --hash $path3) +test "$hash3" = "sha256:1i2x80840igikhbyy7nqf08ymx3a6n83x1fzyrxvddf0sdl5nqvp" + +if [[ -n $(type -p git) ]]; then + repo=$TEST_ROOT/git + + rm -rf $repo $TEST_HOME/.cache/nix + + git init $repo + git -C $repo config user.email "foobar@example.com" + git -C $repo config user.name "Foobar" + + echo utrecht > $repo/hello + touch $repo/.gitignore + git -C $repo add hello .gitignore + git -C $repo commit -m 'Bla1' + + echo world > $repo/hello + git -C $repo commit -m 'Bla2' -a + + treeHash=$(git -C $repo rev-parse HEAD:) + + # Fetch the default branch. + path=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; }).outPath") + [[ $(cat $path/hello) = world ]] + + # Submodules cause error. + (! nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; submodules = true; }).outPath") + + # Check that we can substitute it from other places. + nix copy --to file://$cacheDir $path + nix-store --delete $path + path2=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file:///no-such-repo; treeHash = \"$treeHash\"; }).outPath" --substituters file://$cacheDir --option substitute true) + [ $path2 = $path ] + + # HEAD should be the same path and tree hash as tree + nix eval --impure --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; gitIngestion = true; })" + treeHash2=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; gitIngestion = true; }).treeHash") + [ $treeHash = $treeHash2 ] + path3=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; gitIngestion = true; }).outPath") + [ $path3 = $path ] +else + echo "Git not installed; skipping Git tests" +fi diff --git a/tests/local.mk b/tests/local.mk index 4edf31303a93..f49f1a3ad26f 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -23,6 +23,7 @@ nix_tests = \ experimental-features.sh \ fetchMercurial.sh \ gc-auto.sh \ + git.sh \ user-envs.sh \ user-envs-migration.sh \ binary-cache.sh \