Skip to content

Commit

Permalink
Some minor fixes before merging this
Browse files Browse the repository at this point in the history
  • Loading branch information
Hannah Bast committed Oct 16, 2024
1 parent 78a0aa7 commit d14cd8e
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 40 deletions.
22 changes: 11 additions & 11 deletions src/index/IndexBuilderMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ void writeStxxlConfigFile(const string& location, const string& tail) {
<< STXXL_DISK_SIZE_INDEX_BUILDER << ",syscall\n";
}

// Check, that the `values` has exactly one or `numFiles` many entries. If
// `allowZero` is true, then an empty vector will also be accepted. If this
// Check that `values` has exactly one or `numFiles` many entries. If
// `allowEmpty` is true, then an empty vector will also be accepted. If this
// condition is violated, throw an exception. This is used to validate the
// parameters for file types and default graphs.
static void checkNumParameterValues(const auto& values, size_t numFiles,
bool allowZero,
bool allowEmpty,
std::string_view parameterName) {
if (allowZero && values.empty()) {
if (allowEmpty && values.empty()) {
return;
}
if (values.size() == 1 || values.size() == numFiles) {
Expand All @@ -67,7 +67,7 @@ static void checkNumParameterValues(const auto& values, size_t numFiles,
"\" must be specified either exactly once (in which case it is "
"used for all input files) or exactly as many times as there are "
"input files, in which case each input file has its own value.");
if (allowZero) {
if (allowEmpty) {
absl::StrAppend(&error,
" The parameter can also be omitted entirely, in which "
" case a default value is used for all input files.");
Expand Down Expand Up @@ -95,7 +95,7 @@ qlever::Filetype getFiletype(std::optional<std::string_view> filetype,
return result.value();
} else {
throw std::runtime_error{
absl::StrCat("The parameter --file-format, -F must be one of "
absl::StrCat("The value of --file-format or -F must be one of "
"`ttl`, `nt`, or `nq`, but is `",
filetype.value(), "`")};
}
Expand Down Expand Up @@ -123,8 +123,8 @@ qlever::Filetype getFiletype(std::optional<std::string_view> filetype,
AD_FAIL();
}

// Get parameter values at the given index. If the vector is empty, return the
// given `defaultValue`. If the vector has exactly one element, return that
// Get the parameter value at the given index. If the vector is empty, return
// the given `defaultValue`. If the vector has exactly one element, return that
// element, no matter what the index is.
template <typename T>
T getParameterValue(size_t idx, const auto& values, const T& defaultValue) {
Expand Down Expand Up @@ -186,9 +186,9 @@ int main(int argc, char** argv) {
"or once per file, or not at all (in that case, the format is deduced "
"from the filename suffix if possible).");
add("default-graph,g", po::value(&defaultGraphs),
"The graph IRI without angle brackets. If set to `-`, the default graph "
"will be used. Can be omitted (then all files use the default graph), "
"specified once (then all files use that graph), or once per file.");
"The graph IRI without angle brackets. Write `-` for the default graph. "
"Can be omitted (then all files use the default graph), specified once "
"(then all files use that graph), or once per file.");
add("parse-parallel,p", po::value(&parseParallel),
"Enable or disable the parallel parser for all files (if specified once) "
"or once per input file. Parallel parsing works for all input files "
Expand Down
30 changes: 15 additions & 15 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,28 +287,28 @@ std::pair<size_t, size_t> IndexImpl::createInternalPSOandPOS(
}

// _____________________________________________________________________________
void IndexImpl::prepareInputFileSpecificationsAndLog(
std::vector<Index::InputFileSpecification>& files,
void IndexImpl::updateInputFileSpecificationsAndLog(
std::vector<Index::InputFileSpecification>& spec,
bool parallelParsingSpecifiedViaJson) {
if (files.size() == 1) {
LOG(INFO) << "Processing input triples from " << files.at(0).filename_
<< " ..." << std::endl;
if (spec.size() == 1) {
LOG(INFO) << "Processing triples from " << spec.at(0).filename_ << " ..."
<< std::endl;
} else {
LOG(INFO) << "Processing input triples from " << files.size()
<< " distinct input files" << std::endl;
LOG(INFO) << "Processing triples from " << spec.size()
<< " input streams ..." << std::endl;
}
if (parallelParsingSpecifiedViaJson) {
if (files.size() == 1) {
LOG(WARN) << "Parallel parsing was set to `true` in the settings JSON. "
"This is deprecated, please use the command-line option "
"--parse-parallel or -p instead"
if (spec.size() == 1) {
LOG(WARN) << "Parallel parsing set to `true` in the `.settings.json` "
"file; this is deprecated, please use the command-line "
" option --parse-parallel or -p instead"
<< std::endl;
files.at(0).parseInParallel_ = true;
spec.at(0).parseInParallel_ = true;
} else {
throw std::runtime_error{
"For more than one input file, the parallel parsing must not be "
"specified via the settings JSON file, but has to be specified via "
"the command-line option --parse-parallel or -p"};
"specified via the `.settings.json` file, but has to be specified "
" via the command-line option --parse-parallel or -p"};
}
}
}
Expand All @@ -323,7 +323,7 @@ void IndexImpl::createFromFiles(

readIndexBuilderSettingsFromFile();

prepareInputFileSpecificationsAndLog(files, useParallelParser_);
updateInputFileSpecificationsAndLog(files, useParallelParser_);
IndexBuilderDataAsFirstPermutationSorter indexBuilderData =
createIdTriplesAndVocab(makeRdfParser(files));

Expand Down
8 changes: 4 additions & 4 deletions src/index/IndexImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -762,9 +762,9 @@ class IndexImpl {
void addInternalStatisticsToConfiguration(size_t numTriplesInternal,
size_t numPredicatesInternal);

// Write information from the given `InputFileSpecification` to the log and
// make a few checks.
static void prepareInputFileSpecificationsAndLog(
std::vector<Index::InputFileSpecification>& files,
// Update `InputFileSpecification` based on `parallelParsingSpecifiedViaJson`
// and write a summary to the log.
static void updateInputFileSpecificationsAndLog(
std::vector<Index::InputFileSpecification>& spec,
bool parallelParsingSpecifiedViaJson);
};
19 changes: 9 additions & 10 deletions test/IndexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ TEST(IndexTest, loggingAndSettingOfParallelParsing) {
testing::internal::CaptureStdout();
using namespace ::testing;
{
IndexImpl::prepareInputFileSpecificationsAndLog(files, false);
IndexImpl::updateInputFileSpecificationsAndLog(files, false);
EXPECT_THAT(
testing::internal::GetCapturedStdout(),
AllOf(HasSubstr("from singleFile.ttl"), Not(HasSubstr("parallel"))));
Expand All @@ -503,28 +503,27 @@ TEST(IndexTest, loggingAndSettingOfParallelParsing) {

{
testing::internal::CaptureStdout();
IndexImpl::prepareInputFileSpecificationsAndLog(files, true);
EXPECT_THAT(
testing::internal::GetCapturedStdout(),
AllOf(HasSubstr("from singleFile.ttl"), HasSubstr("This is deprecated"),
HasSubstr("--parse-parallel")));
IndexImpl::updateInputFileSpecificationsAndLog(files, true);
EXPECT_THAT(testing::internal::GetCapturedStdout(),
AllOf(HasSubstr("from singleFile.ttl"), HasSubstr("deprecated"),
HasSubstr("--parse-parallel")));
EXPECT_TRUE(files.at(0).parseInParallel_);
}

{
files.emplace_back("secondFile.ttl", Turtle, std::nullopt, false);
auto filesCopy = files;
testing::internal::CaptureStdout();
IndexImpl::prepareInputFileSpecificationsAndLog(files, false);
IndexImpl::updateInputFileSpecificationsAndLog(files, false);
EXPECT_THAT(testing::internal::GetCapturedStdout(),
AllOf(HasSubstr("from 2 distinct input files"),
Not(HasSubstr("This is deprecated"))));
AllOf(HasSubstr("from 2 input streams"),
Not(HasSubstr("is deprecated"))));
EXPECT_EQ(files, filesCopy);
}

{
AD_EXPECT_THROW_WITH_MESSAGE(
IndexImpl::prepareInputFileSpecificationsAndLog(files, true),
IndexImpl::updateInputFileSpecificationsAndLog(files, true),
HasSubstr("but has to be specified"));
}
}

0 comments on commit d14cd8e

Please sign in to comment.