Skip to content

Commit

Permalink
Located triples with graph information (#1572)
Browse files Browse the repository at this point in the history
Triples from a SPARQL 1.1 Update operation also belong to a graph, but so far, our located triples did not contain graph information. Now each located triple holds an array of *four* `Id`s, where the first three denote the subject, predicate, and object (in the order of the permutation in which the triple is located), and the fourth denotes the graph.
  • Loading branch information
joka921 authored Oct 22, 2024
1 parent c70d5e9 commit f856919
Show file tree
Hide file tree
Showing 12 changed files with 414 additions and 180 deletions.
22 changes: 13 additions & 9 deletions src/global/IdTriple.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

template <size_t N = 0>
struct IdTriple {
// A triple has four components: subject, predicate, object, and graph.
static constexpr size_t NumCols = 4;
// The three IDs that define the triple.
std::array<Id, 3> ids_;
std::array<Id, NumCols> ids_;
// Some additional payload of the triple, e.g. which graph it belongs to.
std::array<Id, N> payload_;

explicit IdTriple(const std::array<Id, 3>& ids) requires(N == 0)
explicit IdTriple(const std::array<Id, NumCols>& ids) requires(N == 0)
: ids_(ids), payload_(){};

explicit IdTriple(const std::array<Id, 3>& ids,
explicit IdTriple(const std::array<Id, NumCols>& ids,
const std::array<Id, N>& payload) requires(N != 0)
: ids_(ids), payload_(payload){};

Expand All @@ -34,10 +36,11 @@ struct IdTriple {
return os;
}

// TODO: default once we drop clang16 with libc++16
// TODO: use `= default` once we drop Clang 16 with `libc++16`.
std::strong_ordering operator<=>(const IdTriple& other) const {
return std::tie(ids_[0], ids_[1], ids_[2]) <=>
std::tie(other.ids_[0], other.ids_[1], other.ids_[2]);
static_assert(NumCols == 4);
return std::tie(ids_[0], ids_[1], ids_[2], ids_[3]) <=>
std::tie(other.ids_[0], other.ids_[1], other.ids_[2], other.ids_[3]);
}
bool operator==(const IdTriple& other) const = default;

Expand All @@ -49,8 +52,8 @@ struct IdTriple {
// Permutes the ID of this triple according to the given permutation given by
// its keyOrder.
IdTriple<N> permute(const std::array<size_t, 3>& keyOrder) const {
std::array<Id, 3> newIds{ids_[keyOrder[0]], ids_[keyOrder[1]],
ids_[keyOrder[2]]};
std::array<Id, NumCols> newIds{ids_[keyOrder[0]], ids_[keyOrder[1]],
ids_[keyOrder[2]], ids_[3]};
if constexpr (N == 0) {
return IdTriple<N>(newIds);
} else {
Expand All @@ -60,6 +63,7 @@ struct IdTriple {

CompressedBlockMetadata::PermutedTriple toPermutedTriple() const
requires(N == 0) {
return {ids_[0], ids_[1], ids_[2]};
static_assert(NumCols == 4);
return {ids_[0], ids_[1], ids_[2], ids_[3]};
}
};
34 changes: 14 additions & 20 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,12 +816,7 @@ CompressedRelationWriter::compressAndWriteColumn(std::span<const Id> column) {
// in the block metadata.
static std::pair<bool, std::optional<std::vector<Id>>> getGraphInfo(
const std::shared_ptr<IdTable>& block) {
// Early bailout if the graph column doesn't exist (should only happen in
// unit tests).
if (block->numColumns() <= ADDITIONAL_COLUMN_GRAPH_ID) {
return {false, std::nullopt};
}

AD_CORRECTNESS_CHECK(block->numColumns() > ADDITIONAL_COLUMN_GRAPH_ID);
// Return true iff the block contains duplicates when only considering the
// actual triple of S, P, and O.
auto hasDuplicates = [&block]() {
Expand Down Expand Up @@ -874,18 +869,13 @@ void CompressedRelationWriter::compressAndWriteBlock(
AD_CORRECTNESS_CHECK(lastCol0Id == last[0]);

auto [hasDuplicates, graphInfo] = getGraphInfo(block);
// The blocks are written in parallel and possibly out of order. We thus
// can't set the proper block indices here. The proper block indices are set
// in the `getFinishedBlocks` function.
static constexpr size_t blockIndexNotYetSet = 111333555;
blockBuffer_.wlock()->push_back(
CompressedBlockMetadata{std::move(offsets),
numRows,
{first[0], first[1], first[2]},
{last[0], last[1], last[2]},
std::move(graphInfo),
hasDuplicates,
blockIndexNotYetSet});
blockBuffer_.wlock()->emplace_back(CompressedBlockMetadataNoBlockIndex{
std::move(offsets),
numRows,
{first[0], first[1], first[2], first[3]},
{last[0], last[1], last[2], last[3]},
std::move(graphInfo),
hasDuplicates});
if (invokeCallback && smallBlocksCallback_) {
std::invoke(smallBlocksCallback_, std::move(block));
}
Expand Down Expand Up @@ -916,6 +906,10 @@ CompressedRelationReader::getRelevantBlocks(
setKey(&PermutedTriple::col1Id_, &ScanSpecification::col1Id);
setKey(&PermutedTriple::col2Id_, &ScanSpecification::col2Id);

// We currently don't filter by the graph ID here.
key.firstTriple_.graphId_ = Id::min();
key.lastTriple_.graphId_ = Id::max();

// This comparator only returns true if a block stands completely before
// another block without any overlap. In other words, the last triple of `a`
// must be smaller than the first triple of `b` to return true.
Expand Down Expand Up @@ -947,14 +941,14 @@ auto CompressedRelationReader::getFirstAndLastTriple(
// Note: the following call only returns the part of the block that
// actually matches the col0 and col1.
return readPossiblyIncompleteBlock(scanSpec, block, std::nullopt,
{{0, 1, 2}});
{{0, 1, 2, ADDITIONAL_COLUMN_GRAPH_ID}});
};

auto rowToTriple =
[&](const auto& row) -> CompressedBlockMetadata::PermutedTriple {
AD_CORRECTNESS_CHECK(!scanSpec.col0Id().has_value() ||
row[0] == scanSpec.col0Id().value());
return {row[0], row[1], row[2]};
return {row[0], row[1], row[2], row[ADDITIONAL_COLUMN_GRAPH_ID]};
};

auto firstBlock = scanBlock(relevantBlocks.front());
Expand Down
36 changes: 22 additions & 14 deletions src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct DecompressedBlockSizeGetter {
using CompressedBlock = std::vector<std::vector<char>>;

// The metadata of a compressed block of ID triples in an index permutation.
struct CompressedBlockMetadata {
struct CompressedBlockMetadataNoBlockIndex {
// Since we have column-based indices, the two columns of each block are
// stored separately (but adjacently).
struct OffsetAndCompressedSize {
Expand Down Expand Up @@ -79,13 +79,14 @@ struct CompressedBlockMetadata {
Id col0Id_;
Id col1Id_;
Id col2Id_;
Id graphId_;
auto operator<=>(const PermutedTriple&) const = default;

// Formatted output for debugging.
friend std::ostream& operator<<(std::ostream& str,
const PermutedTriple& trip) {
str << "Triple: " << trip.col0Id_ << ' ' << trip.col1Id_ << ' '
<< trip.col2Id_ << std::endl;
<< trip.col2Id_ << ' ' << trip.graphId_ << std::endl;
return str;
}

Expand All @@ -103,13 +104,16 @@ struct CompressedBlockMetadata {
// blocks.
bool containsDuplicatesWithDifferentGraphs_;

// Two of these are equal if all members are equal.
bool operator==(const CompressedBlockMetadataNoBlockIndex&) const = default;
};

// The same as the above struct, but this block additionally knows its index.
struct CompressedBlockMetadata : CompressedBlockMetadataNoBlockIndex {
// The index of this block in the permutation. This is required to find
// the corresponding block from the `LocatedTriples` when only a subset of
// blocks is being used.
size_t blockIndex_;

// Two of these are equal if all members are equal.
bool operator==(const CompressedBlockMetadata&) const = default;
};

// Serialization of the `OffsetAndcompressedSize` subclass.
Expand Down Expand Up @@ -173,7 +177,8 @@ AD_SERIALIZE_FUNCTION(CompressedRelationMetadata) {
class CompressedRelationWriter {
private:
ad_utility::Synchronized<ad_utility::File> outfile_;
ad_utility::Synchronized<std::vector<CompressedBlockMetadata>> blockBuffer_;
ad_utility::Synchronized<std::vector<CompressedBlockMetadataNoBlockIndex>>
blockBuffer_;
// If multiple small relations are stored in the same block, keep track of the
// first and last `col0Id`.
Id currentBlockFirstCol0_ = Id::makeUndefined();
Expand Down Expand Up @@ -259,15 +264,19 @@ class CompressedRelationWriter {
std::vector<CompressedBlockMetadata> getFinishedBlocks() && {
finish();
auto blocks = std::move(*(blockBuffer_.wlock()));
std::ranges::sort(blocks, {}, [](const CompressedBlockMetadata& bl) {
return std::tie(bl.firstTriple_.col0Id_, bl.firstTriple_.col1Id_,
bl.firstTriple_.col2Id_);
});
std::ranges::sort(
blocks, {}, [](const CompressedBlockMetadataNoBlockIndex& bl) {
return std::tie(bl.firstTriple_.col0Id_, bl.firstTriple_.col1Id_,
bl.firstTriple_.col2Id_);
});

std::vector<CompressedBlockMetadata> result;
result.reserve(blocks.size());
// Write the correct block indices
for (size_t i : ad_utility::integerRange(blocks.size())) {
blocks.at(i).blockIndex_ = i;
result.emplace_back(std::move(blocks.at(i)), i);
}
return blocks;
return result;
}

// Compute the multiplicity of given the number of elements and the number of
Expand Down Expand Up @@ -356,8 +365,7 @@ class CompressedRelationWriter {
friend std::pair<std::vector<CompressedBlockMetadata>,
std::vector<CompressedRelationMetadata>>
compressedRelationTestWriteCompressedRelations(
const auto& inputs, std::string filename,
ad_utility::MemorySize blocksize);
auto inputs, std::string filename, ad_utility::MemorySize blocksize);
};

using namespace std::string_view_literals;
Expand Down
2 changes: 1 addition & 1 deletion src/index/IndexFormatVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ struct IndexFormatVersion {
// The actual index version. Change it once the binary format of the index
// changes.
inline const IndexFormatVersion& indexFormatVersion{
1571, DateYearOrDuration{Date{2024, 10, 22}}};
1572, DateYearOrDuration{Date{2024, 10, 22}}};
} // namespace qlever
98 changes: 69 additions & 29 deletions src/index/LocatedTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,36 +57,56 @@ NumAddedAndDeleted LocatedTriplesPerBlock::numTriples(size_t blockIndex) const {
return {countInserts, blockUpdateTriples.size() - countInserts};
}

// ____________________________________________________________________________
// Collect the relevant entries of a LocatedTriple into a triple.
template <size_t numIndexColumns>
// Return a `std::tie` of the relevant entries of a row, according to
// `numIndexColumns` and `includeGraphColumn`. For example, if `numIndexColumns`
// is `2` and `includeGraphColumn` is `true`, the function returns
// `std::tie(row[0], row[1], row[2])`.
template <size_t numIndexColumns, bool includeGraphColumn>
requires(numIndexColumns >= 1 && numIndexColumns <= 3)
auto tieIdTableRow(auto& row) {
return [&row]<size_t... I>(std::index_sequence<I...>) {
return std::tie(row[I]...);
}(std::make_index_sequence<numIndexColumns>{});
}(std::make_index_sequence<numIndexColumns +
static_cast<size_t>(includeGraphColumn)>{});
}

// ____________________________________________________________________________
// Collect the relevant entries of a LocatedTriple into a triple.
template <size_t numIndexColumns>
// Return a `std::tie` of the relevant entries of a located triple,
// according to `numIndexColumns` and `includeGraphColumn`. For example, if
// `numIndexColumns` is `2` and `includeGraphColumn` is `true`, the function
// returns `std::tie(ids_[1], ids_[2], ids_[3])`, where `ids_` is from
// `lt->triple_`.
template <size_t numIndexColumns, bool includeGraphColumn>
requires(numIndexColumns >= 1 && numIndexColumns <= 3)
auto tieLocatedTriple(auto& lt) {
constexpr auto indices = []() {
std::array<size_t,
numIndexColumns + static_cast<size_t>(includeGraphColumn)>
a;
for (size_t i = 0; i < numIndexColumns; ++i) {
a[i] = 3 - numIndexColumns + i;
}
if (includeGraphColumn) {
// The graph column resides at index `3` of the located triple.
a.back() = 3;
}
return a;
}();
auto& ids = lt->triple_.ids_;
return [&ids]<size_t... I>(std::index_sequence<I...>) {
return std::tie(ids[3 - numIndexColumns + I]...);
}(std::make_index_sequence<numIndexColumns>{});
return [&ids]<size_t... I>(ad_utility::ValueSequence<size_t, I...>) {
return std::tie(ids[I]...);
}(ad_utility::toIntegerSequence<indices>());
}

// ____________________________________________________________________________
template <size_t numIndexColumns>
template <size_t numIndexColumns, bool includeGraphColumn>
IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
const IdTable& block) const {
// This method should only be called if there are located triples in the
// specified block.
AD_CONTRACT_CHECK(map_.contains(blockIndex));

AD_CONTRACT_CHECK(numIndexColumns <= block.numColumns());
AD_CONTRACT_CHECK(numIndexColumns + static_cast<size_t>(includeGraphColumn) <=
block.numColumns());

auto numInsertsAndDeletes = numTriples(blockIndex);
IdTable result{block.numColumns(), block.getAllocator()};
Expand All @@ -95,24 +115,33 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
const auto& locatedTriples = map_.at(blockIndex);

auto lessThan = [](const auto& lt, const auto& row) {
return tieLocatedTriple<numIndexColumns>(lt) <
tieIdTableRow<numIndexColumns>(row);
return tieLocatedTriple<numIndexColumns, includeGraphColumn>(lt) <
tieIdTableRow<numIndexColumns, includeGraphColumn>(row);
};
auto equal = [](const auto& lt, const auto& row) {
return tieLocatedTriple<numIndexColumns>(lt) ==
tieIdTableRow<numIndexColumns>(row);
return tieLocatedTriple<numIndexColumns, includeGraphColumn>(lt) ==
tieIdTableRow<numIndexColumns, includeGraphColumn>(row);
};

auto rowIt = block.begin();
auto locatedTripleIt = locatedTriples.begin();
auto resultIt = result.begin();

auto writeTripleToResult = [&result, &resultIt](auto& locatedTriple) {
for (size_t i = 0; i < numIndexColumns; i++) {
// Write the given `locatedTriple` to `result` at position `resultIt` and
// advance `resultIt` by one. See the example in the comment of the
// declaration of `mergeTriples` to understand the behavior of this function.
auto writeLocatedTripleToResult = [&result, &resultIt](auto& locatedTriple) {
// Write part from `locatedTriple` that also occurs in the input `block` to
// the result.
static constexpr auto plusOneIfGraph =
static_cast<size_t>(includeGraphColumn);
for (size_t i = 0; i < numIndexColumns + plusOneIfGraph; i++) {
(*resultIt)[i] = locatedTriple.triple_.ids_[3 - numIndexColumns + i];
}
// Write UNDEF to any additional columns.
for (size_t i = numIndexColumns; i < result.numColumns(); i++) {
// If the input `block` has payload columns (which located triples don't
// have), set their values to UNDEF.
for (size_t i = numIndexColumns + plusOneIfGraph; i < result.numColumns();
i++) {
(*resultIt)[i] = ValueId::makeUndefined();
}
resultIt++;
Expand All @@ -122,7 +151,7 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
if (lessThan(locatedTripleIt, *rowIt)) {
if (locatedTripleIt->shouldTripleExist_) {
// Insertion of a non-existent triple.
writeTripleToResult(*locatedTripleIt);
writeLocatedTripleToResult(*locatedTripleIt);
}
locatedTripleIt++;
} else if (equal(locatedTripleIt, *rowIt)) {
Expand All @@ -142,7 +171,7 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
std::ranges::for_each(
std::ranges::subrange(locatedTripleIt, locatedTriples.end()) |
std::views::filter(&LocatedTriple::shouldTripleExist_),
writeTripleToResult);
writeLocatedTripleToResult);
}
if (rowIt != block.end()) {
AD_CORRECTNESS_CHECK(locatedTripleIt == locatedTriples.end());
Expand All @@ -158,14 +187,25 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
// ____________________________________________________________________________
IdTable LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
const IdTable& block,
size_t numIndexColumns) const {
if (numIndexColumns == 3) {
return mergeTriplesImpl<3>(blockIndex, block);
} else if (numIndexColumns == 2) {
return mergeTriplesImpl<2>(blockIndex, block);
size_t numIndexColumns,
bool includeGraphColumn) const {
// The following code does nothing more than turn `numIndexColumns` and
// `includeGraphColumn` into template parameters of `mergeTriplesImpl`.
auto mergeTriplesImplHelper = [numIndexColumns, blockIndex, &block,
this]<bool hasGraphColumn>() {
if (numIndexColumns == 3) {
return mergeTriplesImpl<3, hasGraphColumn>(blockIndex, block);
} else if (numIndexColumns == 2) {
return mergeTriplesImpl<2, hasGraphColumn>(blockIndex, block);
} else {
AD_CORRECTNESS_CHECK(numIndexColumns == 1);
return mergeTriplesImpl<1, hasGraphColumn>(blockIndex, block);
}
};
if (includeGraphColumn) {
return mergeTriplesImplHelper.template operator()<true>();
} else {
AD_CORRECTNESS_CHECK(numIndexColumns == 1);
return mergeTriplesImpl<1>(blockIndex, block);
return mergeTriplesImplHelper.template operator()<false>();
}
}

Expand Down
Loading

0 comments on commit f856919

Please sign in to comment.