diff --git a/cpp/libc.cc b/cpp/libc.cc index c4d0f98705..67ed8f6935 100644 --- a/cpp/libc.cc +++ b/cpp/libc.cc @@ -6,7 +6,6 @@ #include #include #include -#include #include // getenv() #include #include // gethostname() @@ -17,110 +16,92 @@ namespace libc { -class RegexCache { - public: - static const int kDefaultSize = 100; - - struct CacheEntry { - CacheEntry() = delete; - CacheEntry(const CacheEntry&) = delete; - - CacheEntry(BigStr* pat, int cflags) : pat_() { - int status = ::regcomp(&compiled_, pat->data_, cflags); - if (status != 0) { - char error_desc[50]; - regerror(status, &compiled_, error_desc, 50); +RegexCache::CacheEntry::CacheEntry(BigStr* pat, int cflags) : pat_() { + int status = ::regcomp(&compiled_, pat->data_, cflags); + if (status != 0) { + char error_desc[50]; + regerror(status, &compiled_, error_desc, 50); - char error_message[80]; - snprintf(error_message, 80, "Invalid regex %s (%s)", pat->data_, - error_desc); + char error_message[80]; + snprintf(error_message, 80, "Invalid regex %s (%s)", pat->data_, + error_desc); - throw Alloc(StrFromC(error_message)); - } + throw Alloc(StrFromC(error_message)); + } - pat_ = static_cast(malloc(len(pat) + 1)); - memcpy(pat_, pat->data_, len(pat) + 1); - pat_hash_ = hash(pat); - } + pat_ = static_cast(malloc(len(pat) + 1)); + memcpy(pat_, pat->data_, len(pat) + 1); + pat_hash_ = hash(pat); +} - ~CacheEntry() { - DCHECK(pat_ != nullptr); - free(pat_); - regfree(&compiled_); - } +RegexCache::CacheEntry::~CacheEntry() { + DCHECK(pat_ != nullptr); + free(pat_); + regfree(&compiled_); +} - char* pat_; - int pat_hash_; - regex_t compiled_; - }; - - RegexCache(int capacity) : capacity_(capacity), access_list_() { - // Override if env var is set. - char* e = getenv("OILS_REGEX_CACHE_SIZE"); - if (e) { - int result; - if (StringToInteger(e, strlen(e), 10, &result)) { - capacity_ = result; - } +RegexCache::RegexCache(int capacity) : capacity_(capacity), access_list_() { + // Override if env var is set. + char* e = getenv("OILS_REGEX_CACHE_SIZE"); + if (e) { + int result; + if (StringToInteger(e, strlen(e), 10, &result)) { + capacity_ = result; } } +} - ~RegexCache() { - for (auto& it : access_list_) { - delete it; - } +RegexCache::~RegexCache() { + for (auto& it : access_list_) { + delete it; } +} - regex_t* regcomp(BigStr* pat, int cflags) { - CacheEntry* entry = TakeEntry(pat); - if (entry == nullptr) { - // Dealing with a new entry. Make space and compile. - MaybeEvict(); - entry = new CacheEntry(pat, cflags); - } - - SetMostRecent(entry); - - return &entry->compiled_; +regex_t* RegexCache::regcomp(BigStr* pat, int cflags) { + RegexCache::CacheEntry* entry = TakeEntry(pat); + if (entry == nullptr) { + // Dealing with a new entry. Make space and compile. + MaybeEvict(); + entry = new RegexCache::CacheEntry(pat, cflags); } - private: - CacheEntry* TakeEntry(BigStr* pat) { - auto it = std::find_if(access_list_.begin(), access_list_.end(), - [pat](CacheEntry* entry) { - return hash(pat) == entry->pat_hash_ && - strcmp(pat->data_, entry->pat_) == 0; - }); - if (it == access_list_.end()) { - return nullptr; - } + SetMostRecent(entry); + + return &entry->compiled_; +} - CacheEntry* ret = *it; - access_list_.erase(it); - return ret; +RegexCache::CacheEntry* RegexCache::TakeEntry(BigStr* pat) { + auto it = std::find_if(access_list_.begin(), access_list_.end(), + [pat](RegexCache::CacheEntry* entry) { + return hash(pat) == entry->pat_hash_ && + strcmp(pat->data_, entry->pat_) == 0; + }); + if (it == access_list_.end()) { + return nullptr; } - void MaybeEvict() { - if (access_list_.size() < capacity_) { - return; - } + RegexCache::CacheEntry* ret = *it; + access_list_.erase(it); + return ret; +} - // Evict the least recently used entry. - if (access_list_.size()) { - delete *access_list_.begin(); - access_list_.erase(access_list_.begin()); - } +void RegexCache::MaybeEvict() { + if (access_list_.size() < capacity_) { + return; } - void SetMostRecent(CacheEntry* entry) { - access_list_.push_back(entry); + // Evict the least recently used entry. + if (access_list_.size()) { + delete *access_list_.begin(); + access_list_.erase(access_list_.begin()); } +} - size_t capacity_; - std::vector access_list_; -}; +void RegexCache::SetMostRecent(RegexCache::CacheEntry* entry) { + access_list_.push_back(entry); +} -static RegexCache gRegexCache(RegexCache::kDefaultSize); +RegexCache gRegexCache(RegexCache::kDefaultSize); BigStr* gethostname() { // Note: Fixed issue #1656 - OS X and FreeBSD don't have HOST_NAME_MAX diff --git a/cpp/libc.h b/cpp/libc.h index b209125530..7753830e5d 100644 --- a/cpp/libc.h +++ b/cpp/libc.h @@ -3,6 +3,7 @@ #ifndef LIBC_H #define LIBC_H +#include #include #include "mycpp/runtime.h" @@ -33,6 +34,38 @@ List* regex_search(BigStr* pattern, int cflags, BigStr* str, int eflags, int wcswidth(BigStr* str); int get_terminal_width(); +class RegexCache { + public: + static const int kDefaultSize = 100; + + struct CacheEntry { + CacheEntry() = delete; + CacheEntry(const CacheEntry&) = delete; + + CacheEntry(BigStr* pat, int cflags); + ~CacheEntry(); + + char* pat_; + int pat_hash_; + regex_t compiled_; + }; + + RegexCache(int capacity); + ~RegexCache(); + + regex_t* regcomp(BigStr* pat, int cflags); + + private: + CacheEntry* TakeEntry(BigStr* pat); + void MaybeEvict(); + void SetMostRecent(CacheEntry* entry); + + size_t capacity_; + std::vector access_list_; +}; + +extern RegexCache gRegexCache; + } // namespace libc #endif // LIBC_H diff --git a/cpp/libc_test.cc b/cpp/libc_test.cc index 0d65d24301..fee45fbeed 100644 --- a/cpp/libc_test.cc +++ b/cpp/libc_test.cc @@ -146,13 +146,13 @@ TEST for_test_coverage() { } void FindAll(const char* p, const char* s) { - regex_t pat; + regex_t* pat; int cflags = REG_EXTENDED; - if (regcomp(&pat, p, cflags) != 0) { + if ((pat = libc::gRegexCache.regcomp(StrFromC(p), cflags)) == nullptr) { FAIL(); } - int outlen = pat.re_nsub + 1; // number of captures + int outlen = pat->re_nsub + 1; // number of captures // TODO: Could statically allocate 99, and assert that re_nsub is less than // 99. Would speed up loops. @@ -164,7 +164,7 @@ void FindAll(const char* p, const char* s) { while (true) { // Necessary so ^ doesn't match in the middle! int eflags = cur_pos == 0 ? 0 : REG_NOTBOL; - bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0; + bool match = regexec(pat, s + cur_pos, outlen, pmatch, eflags) == 0; if (!match) { break; @@ -186,7 +186,6 @@ void FindAll(const char* p, const char* s) { } free(pmatch); - regfree(&pat); } // adjacent matches