Skip to content

Commit

Permalink
Add checkout command
Browse files Browse the repository at this point in the history
Add "tog checkout" command to restore workdir contents to
previously committed state. Add resolution mechanism in reposiotry.h
to (recursively) expand merkle tree. Also add helper methods to
traverse the expanded tree and restore dir/file contents.
  • Loading branch information
iamlucaswolf committed Nov 15, 2021
1 parent bd3f726 commit 9066f46
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 10 deletions.
4 changes: 4 additions & 0 deletions src/blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ struct Blob : public TogObject {
Blob(const std::filesystem::path& path);
const std::vector<unsigned char>& serialize();

const std::vector<unsigned char>& data() const {
return _data;
}

private:
std::vector<unsigned char> _data;
};
Expand Down
2 changes: 1 addition & 1 deletion src/commit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Commit::Commit(Handle<Tree> tree, std::optional<Handle<Commit>> parent,
std::string message)
: _tree{std::move(tree)}, _parent{parent}, _message{std::move(message)} {}

const std::vector<unsigned char> &Commit::serialize() {
const std::vector<unsigned char>& Commit::serialize() {
// serialize lazily
if (_serialized) {
return *_serialized;
Expand Down
5 changes: 5 additions & 0 deletions src/commit.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef TOG_COMMIT_H
#define TOG_COMMIT_H

#include <filesystem>
#include <optional>
#include <string>

Expand All @@ -18,6 +19,10 @@ class Commit : public TogObject {

const std::vector<unsigned char>& serialize();

Handle<Tree>& tree() {
return _tree;
}

private:
// the top-level tree of the commit (i.e. the worktree)
Handle<Tree> _tree;
Expand Down
9 changes: 9 additions & 0 deletions src/handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ class Handle {
Handle(std::string hash, std::shared_ptr<T> object, bool dirty)
: _hash{std::move(hash)}, _dirty{dirty}, _object{std::move(object)} {};

void resolve(std::shared_ptr<T> object) {
_object = std::move(object);
_dirty = false;
}

bool dirty() const {
return _dirty;
}

bool resolved() const {
return _object != nullptr;
}

const std::string& hash() const {
return _hash;
}
Expand Down
191 changes: 187 additions & 4 deletions src/repository.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ std::string Repository::commit(const std::string& message) {
// (which is the only branch for now). In git, this is known as "detached
// head".
if (_head && (_head->hash() != _main->hash())) {
throw TogException{"cannot commit when not at latest commit"};
throw TogException{"not at latest commit of current branch"};
}

auto tree_handle = add_directory(_worktree_path);
Expand Down Expand Up @@ -119,10 +119,193 @@ std::string Repository::commit(const std::string& message) {
return commit.hash();
}

Handle<Blob>& Repository::add_file(const fs::path& file_path) {
Handle<Blob>& h = register_object(std::make_unique<Blob>(file_path));
void Repository::checkout(const std::string& hash) {
auto commit = Handle<Commit>{hash};
resolve(commit);

// clear working directory (except .tog)
for (const auto& entry : fs::directory_iterator(_worktree_path)) {
if (entry.path().filename() != ".tog") {
fs::remove_all(entry.path());
}
}

auto tree = commit.object()->tree();
resolve(tree);
restoreTree(tree, _worktree_path);

_head = commit;
persist_ref(_togdir_path / "refs" / "head", _head);
}

void Repository::resolve(Handle<Blob>& blob) {
if (blob.resolved()) {
return;
}

auto hash = blob.hash();
auto path = _togdir_path / "objects" / hash;

if (!fs::exists(path)) {
throw TogException{"object not found"};
}

// cache for future resolutions
if (!_blobs.contains(hash)) {
// Explicitly copy hash/object to avoid dangling references
_blobs.emplace(std::string{hash}, Handle<Blob>{blob});
}

auto& cached = _blobs.at(hash);

if (!cached.resolved()) {
// TODO error management; what if object is not a blob?

// TODO using the blob constructor works, because blobs are not yet
// compressed. Revisit this when we have compression.
cached.resolve(std::make_shared<Blob>(path));
}

// copy assignment operator
blob = cached;
}

void Repository::resolve(Handle<Tree>& tree) {
if (tree.resolved()) {
return;
}

auto hash = tree.hash();
auto path = _togdir_path / "objects" / hash;

if (!fs::exists(path)) {
throw TogException{"object not found"};
}

// cache for future resolutions
if (!_trees.contains(hash)) {
// Explicitly copy hash/object to avoid dangling references
_trees.emplace(std::string{hash}, Handle<Tree>{tree});
}

auto& cached = _trees.at(hash);

if (!cached.resolved()) {
// TODO error management; what if object is not a tree?

// load commit from disk
auto deserialized = toml::parse_file(path.string());

// parse blobs
std::unordered_map<std::string, Handle<Blob>> blobs;

for (const auto& [key, value] : *deserialized["blobs"].as_table()) {
std::string blob_hash{value.value_or("")};

if (blob_hash.empty()) {
throw TogException{"corrupt tree object " + hash};
}

blobs.emplace(key, Handle<Blob>{blob_hash});
}

return h;
// parse trees
std::unordered_map<std::string, Handle<Tree>> trees;

for (const auto& [key, value] : *deserialized["trees"].as_table()) {
std::string tree_hash{value.value_or("")};

if (tree_hash.empty()) {
throw TogException{"corrupt tree object " + hash};
}

trees.emplace(key, Handle<Tree>{tree_hash});
}

cached.resolve(
std::make_shared<Tree>(std::move(blobs), std::move(trees)));
}

// copy assignment operator
tree = cached;
}

void Repository::resolve(Handle<Commit>& commit) {
if (commit.resolved()) {
return;
}

auto hash = commit.hash();
auto path = _togdir_path / "objects" / hash;

if (!fs::exists(path)) {
throw TogException{"commit does not exist"};
}

// cache for future resolutions
if (!_commits.contains(hash)) {
// Explicitly copy hash/object to avoid dangling references
_commits.emplace(std::string{hash}, Handle<Commit>{commit});
}

auto& cached = _commits.at(hash);

if (!cached.resolved()) {
// TODO error management; what if object is not a commit?

// load commit from disk
auto deserialized = toml::parse_file(path.string());

// parse tree
auto tree_hash = deserialized["tree"].value<std::string>();
auto tree = Handle<Tree>{*tree_hash};

// parse parent
auto parent_hash = deserialized["parent"].value<std::string>();
std::optional<Handle<Commit>> parent;

if (parent_hash && !parent_hash->empty()) {
parent = Handle<Commit>{*parent_hash};
}

// parse message
auto message = deserialized["message"].value<std::string>();

cached.resolve(
std::make_shared<Commit>(tree, parent, message.value_or("")));
}

// copy assignment operator
commit = cached;
}

void Repository::restoreTree(Handle<Tree>& tree, const fs::path& path) {
resolve(tree);

// Create the directory if it doesn't exist
fs::create_directories(path);

// Populate the directory with files
for (auto& [name, blob] : tree.object()->blobs()) {
restoreBlob(blob, path / name);
}

// Populate the directory with subdirectories
for (auto& [name, sub_tree] : tree.object()->trees()) {
restoreTree(sub_tree, path / name);
}
}

void Repository::restoreBlob(Handle<Blob>& blob, const fs::path& path) {
resolve(blob);
const auto& data = blob.object()->data();

std::ofstream stream{path, std::ios::out | std::ios::binary};
stream.write(reinterpret_cast<const char*>(data.data()), data.size());
}

Handle<Blob>& Repository::add_file(const fs::path& file_path) {
return register_object(std::make_unique<Blob>(file_path));
}

Handle<Tree>& Repository::add_directory(const fs::path& directory_path) {
Expand Down
20 changes: 17 additions & 3 deletions src/repository.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class Repository {
// returns the commit object's hash
std::string commit(const std::string& message);

// restores the worktree to the state captured by the given commit
void checkout(const std::string& hash);

// Initialized a new repository in the given directory.
static void init(const std::filesystem::path& path);

Expand All @@ -43,14 +46,25 @@ class Repository {
Handle<Tree>& add_directory(const std::filesystem::path& directory_path);

// register_object will move the given object into the repository's object
// store and return a handle to it. Note that this handle may not refer to
// the same (as in "identical") object as the one passed in.
// store and return a (resolved) handle to it. Note that this handle may not
// refer to the same (as in "identical") object as the one passed in.
// TODO: Unify these methods once I have better understanding of templates
Handle<Blob>& register_object(std::unique_ptr<Blob> blob);
Handle<Tree>& register_object(std::unique_ptr<Tree> tree);
Handle<Commit>& register_object(std::unique_ptr<Commit> commit);

// helper methods to load/store refs from .tog/refs
// Resolves the given handle. If the hash is not found in the repository,
// an exception is thrown. Note that the returned handle may not be the
// same as the one passed in.
void resolve(Handle<Blob>& blob);
void resolve(Handle<Tree>& tree);
void resolve(Handle<Commit>& commit);

// recursively restores the contents of the given tree at the given path.
void restoreTree(Handle<Tree>& tree, const std::filesystem::path& path);
void restoreBlob(Handle<Blob>& blob, const std::filesystem::path& path);

// load/store refs from .tog/refs
std::optional<Handle<Commit>> load_ref(const std::filesystem::path& path);
void persist_ref(const std::filesystem::path& path,
const std::optional<Handle<Commit>>& commit);
Expand Down
28 changes: 28 additions & 0 deletions src/tog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,25 @@ void commit(const std::string &message) {
}
}

void checkout(const std::string &hash) {
auto togdir_path = fs::current_path() / ".tog";

if (!fs::exists(togdir_path)) {
std::cout << "Not a tog repository" << std::endl;
return;
}

try {
auto repo = tog::Repository{togdir_path};
repo.checkout(hash);

std::cout << "Checked out commit " << hash << std::endl;

} catch (const std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
}
}

int main(int argc, char **argv) {
CLI::App app{"The simple version control system", "tog"};

Expand All @@ -55,6 +74,15 @@ int main(int argc, char **argv) {
commit_cmd->add_option("-m,--message", commit_message, "Commit message");
commit_cmd->callback([&commit_message]() { commit(commit_message); });

// tog checkout <commit>
auto checkout_cmd = app.add_subcommand("checkout", "Checkout a commit");
std::string checkout_hash;

// TODO fix formatting in .clang-format
checkout_cmd->add_option("commit", checkout_hash, "Commit hash")
->required();
checkout_cmd->callback([&checkout_hash]() { checkout(checkout_hash); });

try {
CLI11_PARSE(app, argc, argv);
} catch (const CLI::ParseError &e) {
Expand Down
12 changes: 10 additions & 2 deletions src/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,20 @@ class Tree : public TogObject {

const std::vector<unsigned char> &serialize();

std::unordered_map<std::string, Handle<Blob>> &blobs() {
return _blobs;
}

std::unordered_map<std::string, Handle<Tree>> &trees() {
return _trees;
}

private:
// cached serialized tree, for lazy serialization
std::optional<std::vector<unsigned char>> _serialized;

const std::unordered_map<std::string, Handle<Blob>> _blobs;
const std::unordered_map<std::string, Handle<Tree>> _trees;
std::unordered_map<std::string, Handle<Blob>> _blobs;
std::unordered_map<std::string, Handle<Tree>> _trees;
};

} // namespace tog
Expand Down

0 comments on commit 9066f46

Please sign in to comment.