Skip to content

Commit

Permalink
Add commit command
Browse files Browse the repository at this point in the history
Add "tog commit" command. Add CryptoPP as a dependency for SHA256
hashing. Commits are represented as a merkle tree, implemented by
class hierarchy (object.h, commit.h, tree.h, blob.h). Add handle
class to supprot lazy loading of objects.
  • Loading branch information
iamlucaswolf committed Nov 15, 2021
1 parent 0a17448 commit bd3f726
Show file tree
Hide file tree
Showing 15 changed files with 696 additions and 27 deletions.
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ set(CMAKE_CXX_STANDARD 20)
set(CXX_FLAGS "-Wall")
set(CMAKE_CXX_FLAGS "${CXX_FLAGS}")

find_package(CryptoPP REQUIRED)

# TODO there's probably a better way to do this with CMake
include_directories("libs/")
add_executable(${PROJECT_NAME} src/tog.cpp)
add_executable(
${PROJECT_NAME} src/tog.cpp src/repository.cpp src/blob.cpp src/crypto.cpp
src/tree.cpp src/commit.cpp
)
target_link_libraries(tog PRIVATE CryptoPP::CryptoPP)
27 changes: 27 additions & 0 deletions cmake/FindCryptoPP.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Taken from https://github.com/mateuszzz88/FindCryptoPP.cmake

find_path(CryptoPP_INCLUDE_DIR NAMES cryptopp/config.h DOC "CryptoPP include directory")
find_library(CryptoPP_LIBRARY NAMES cryptopp DOC "CryptoPP library")

if(CryptoPP_INCLUDE_DIR)
file(STRINGS ${CryptoPP_INCLUDE_DIR}/cryptopp/config.h _config_version REGEX "CRYPTOPP_VERSION")
string(REGEX MATCH "([0-9])([0-9])([0-9])" _match_version ${_config_version})
set(CryptoPP_VERSION_STRING "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(CryptoPP
REQUIRED_VARS CryptoPP_INCLUDE_DIR CryptoPP_LIBRARY
FOUND_VAR CryptoPP_FOUND
VERSION_VAR CryptoPP_VERSION_STRING)

if(CryptoPP_FOUND AND NOT TARGET CryptoPP::CryptoPP)
add_library(CryptoPP::CryptoPP UNKNOWN IMPORTED)
set_target_properties(CryptoPP::CryptoPP PROPERTIES
IMPORTED_LOCATION "${CryptoPP_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${CryptoPP_INCLUDE_DIR}")
endif()

mark_as_advanced(CryptoPP_INCLUDE_DIR CryptoPP_LIBRARY)
set(CryptoPP_INCLUDE_DIRS ${CryptoPP_INCLUDE_DIR})
set(CryptoPP_LIBRARIES ${CryptoPP_LIBRARY})
35 changes: 35 additions & 0 deletions src/blob.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "blob.h"

#include <filesystem>
#include <fstream>
#include <iterator>

namespace fs = std::filesystem;

namespace tog {

Blob::Blob(const fs::path& path) {
// Read file in binary mode, without skipping white space
std::ifstream file(path, std::ios::binary);
file.unsetf(std::ios::skipws);

// Get file size by seeking to the end of the stream and getting the cursor
// position
file.seekg(0, std::ios::end);
auto file_size = file.tellg();

// Seek back to the beginning of the stream
file.seekg(0, std::ios::beg);

_data.reserve(file_size);
_data.insert(_data.begin(), std::istream_iterator<unsigned char>(file),
std::istream_iterator<unsigned char>());
}

const std::vector<unsigned char>& Blob::serialize() {
// as of now, the serialization is just the raw data (i.e. no custom binary
// layout is used, like in git). this may change in the future
return _data;
}

} // namespace tog
25 changes: 25 additions & 0 deletions src/blob.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef TOG_BLOB_H
#define TOG_BLOB_H

#include <filesystem>
#include <string>
#include <vector>

#include "object.h"

namespace tog {

// A blob is a sequence of bytes that corresponds to a file in the worktree
struct Blob : public TogObject {
public:
// Create a blob from file
Blob(const std::filesystem::path& path);
const std::vector<unsigned char>& serialize();

private:
std::vector<unsigned char> _data;
};

} // namespace tog

#endif
34 changes: 34 additions & 0 deletions src/commit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "commit.h"

#include <tomlplusplus/toml.h>

namespace tog {

// TODO fix format in .clang-format
Commit::Commit(Handle<Tree> tree, std::optional<Handle<Commit>> parent,
std::string message)
: _tree{std::move(tree)}, _parent{parent}, _message{std::move(message)} {}

const std::vector<unsigned char> &Commit::serialize() {
// serialize lazily
if (_serialized) {
return *_serialized;
}

// encode as toml
auto commit_toml = toml::table{{
{"message", _message},
{"tree", _tree.hash()},
{"parent", _parent ? _parent->hash() : ""},
}};

std::stringstream stream{};
stream << commit_toml;

std::string commit_str = stream.str();
_serialized.emplace(commit_str.begin(), commit_str.end());

return *_serialized;
}

} // namespace tog
37 changes: 37 additions & 0 deletions src/commit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef TOG_COMMIT_H
#define TOG_COMMIT_H

#include <optional>
#include <string>

#include "handle.h"
#include "object.h"
#include "tree.h"

namespace tog {

// A commit represents a single commit in a repository
class Commit : public TogObject {
public:
Commit(Handle<Tree> tree, std::optional<Handle<Commit>> parent,
std::string message);

const std::vector<unsigned char>& serialize();

private:
// the top-level tree of the commit (i.e. the worktree)
Handle<Tree> _tree;

// the preceding commit to this commit (if any)
std::optional<Handle<Commit>> _parent;

// the user-specified commit message
std::string _message;

// cached serialized commit, for lazy serialization
std::optional<std::vector<unsigned char>> _serialized;
};

} // namespace tog

#endif // TOG_COMMIT_H
24 changes: 24 additions & 0 deletions src/crypto.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "crypto.h"

#include <cryptopp/cryptlib.h>
#include <cryptopp/filters.h>
#include <cryptopp/hex.h>
#include <cryptopp/sha.h>

namespace tog {

std::string sha256(const std::vector<unsigned char> &data) {
CryptoPP::SHA256 hash;
std::string digest;

// CryptoPP assumes ownership of the created raw pointers, so no
// need to delete them.
auto sink = new CryptoPP::StringSink(digest);
auto hex = new CryptoPP::HexEncoder(sink);
auto filter = new CryptoPP::HashFilter(hash, hex);
CryptoPP::ArraySource(data.data(), data.size(), true, filter);

return digest;
}

} // namespace tog
20 changes: 20 additions & 0 deletions src/crypto.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef TOG_CRYPTO_H
#define TOG_CRYPTO_H

#include <string>
#include <vector>

namespace tog {

// computes the SHA-256 hash of the given data
std::string sha256(const std::vector<unsigned char>& data);

// verifies that the given hash signature is valid for the given data
inline bool verify_sha256(const std::vector<unsigned char>& data,
const std::string& hash) {
return sha256(data) == hash;
}

} // namespace tog

#endif
53 changes: 53 additions & 0 deletions src/handle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#ifndef TOG_HANDLE_H
#define TOG_HANDLE_H

#include <memory>
#include <string>

namespace tog {

// TODO: use C++20 concepts to enforce that T is a TogObject
template <class T>

// A handle represents a TogObject that may have not yet been loaded into
// memory.
class Handle {
public:
Handle() = default;
virtual ~Handle() = default;

// Constructs an unresolved handle to an existing object with a given hash.
Handle(std::string hash)
: _hash{std::move(hash)}, _dirty{false}, _object{nullptr} {};

// Constructs a resolved handle to an existing object with a given hash.
Handle(std::string hash, std::shared_ptr<T> object, bool dirty)
: _hash{std::move(hash)}, _dirty{dirty}, _object{std::move(object)} {};

bool dirty() const {
return _dirty;
}

const std::string& hash() const {
return _hash;
}

const std::shared_ptr<T>& object() const {
return _object;
}

private:
// The hash of the object referenced by this handle.
std::string _hash;

// Indicates whether the object referenced by this handle needs to be
// persisted on disk.
bool _dirty;

// The object referenced by this handle. This may be nullptr if the handle
// is not resolved yet.
std::shared_ptr<T> _object;
};
} // namespace tog

#endif // TOG_REF_H
16 changes: 16 additions & 0 deletions src/object.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef TOG_OBJECT_H
#define TOG_OBJECT_H

#include <vector>

namespace tog {

class TogObject {
public:
virtual const std::vector<unsigned char>& serialize() = 0;
virtual ~TogObject() = default;
};

} // namespace tog

#endif // TOG_OBJECT_H
Loading

0 comments on commit bd3f726

Please sign in to comment.