Skip to content

Commit

Permalink
feat: use glob-to-regex to simplify and enhance rule_based_entry_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Nov 16, 2024
1 parent 480de27 commit 39e8f57
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 63 deletions.
81 changes: 18 additions & 63 deletions src/writer/rule_based_entry_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <dwarfs/writer/entry_interface.h>
#include <dwarfs/writer/rule_based_entry_filter.h>

#include <dwarfs/internal/glob_to_regex.h>

namespace dwarfs::writer {

namespace internal {
Expand All @@ -51,7 +53,7 @@ struct filter_rule {
};

filter_rule(rule_type type, bool floating, std::string const& re,
std::string const& rule)
std::string_view rule)
: type{type}
, floating{floating}
, re{re}
Expand Down Expand Up @@ -89,14 +91,15 @@ class rule_based_entry_filter_ : public rule_based_entry_filter::impl {

template <typename LoggerPolicy>
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
std::string_view rule_sv) -> filter_rule {
std::string rule{rule_sv};
std::string_view rule) -> filter_rule {
std::string re;
filter_rule::rule_type type;

auto* p = rule.c_str();
if (rule.empty()) {
throw std::runtime_error("empty filter rule");
}

switch (*p) {
switch (rule[0]) {
case '+':
type = filter_rule::rule_type::include;
break;
Expand All @@ -107,71 +110,23 @@ auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
throw std::runtime_error("rules must start with + or -");
}

while (*++p == ' ')
;
auto pattern_start = rule.find_first_not_of(' ', 1);

if (pattern_start == std::string::npos) {
throw std::runtime_error("no pattern specified in filter rule: " +
std::string(rule));
}

auto pattern = rule.substr(pattern_start);

// If the start of the pattern is not explicitly anchored, make it floating.
bool floating = *p && *p != '/';
bool floating = pattern[0] != '/';

if (floating) {
re += ".*/";
}

while (*p) {
switch (*p) {
case '\\':
re += *p++;
if (p) {
re += *p++;
}
continue;

case '*': {
int nstar = 1;
while (*++p == '*') {
++nstar;
}
switch (nstar) {
case 1:
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
re += "[^/]+";
} else {
re += "[^/]*";
}
break;
case 2:
re += ".*";
break;
default:
throw std::runtime_error("too many *s");
}
}
continue;

case '?':
re += "[^/]";
break;

case '.':
case '+':
case '^':
case '$':
case '(':
case ')':
case '{':
case '}':
case '|':
re += '\\';
re += *p;
break;

default:
re += *p;
break;
}

++p;
}
re += dwarfs::internal::glob_to_regex_string(pattern) + "$";

LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
<< "]";
Expand Down
19 changes: 19 additions & 0 deletions test/tool_main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,25 @@ TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) {
::testing::HasSubstr("cannot combine --input-list and --filter"));
}

TEST(mkdwarfs_test, rules_must_start_with_plus_or_minus) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "% *"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("rules must start with + or -"));
}

TEST(mkdwarfs_test, empty_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", ""}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("empty filter rule"));
}

TEST(mkdwarfs_test, no_pattern_specified_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+ "}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("no pattern specified in filter rule"));
}

TEST(mkdwarfs_test, cannot_open_input_list_file) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"}));
Expand Down

0 comments on commit 39e8f57

Please sign in to comment.