From bf56bb7ac17ec3c3a3a33b8cf0808aaaad32dcb7 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Tue, 11 Jun 2024 12:42:47 -0400
Subject: [PATCH 01/13] Upgrade generated parse_line() to accept SBuf

These changes attempt to preserve existing squid.conf grammar
idiosyncrasies. A lot of squid.conf parsing code needs to be upgraded;
these changes focus on parse_line() because upcoming reconfiguration
improvements require that particular function to accept SBuf.

Also fixed ConfigParser SetCfgLine() method and CfgLine data member
descriptions. These members are not about the whole squid.conf line, and
not every squid.conf line goes through these members: These members are
dedicated to tokens located after a directive name on a directive line.
A lot of related code needs to be polished, but we fixed these docs
because this change affects all SetCfgLine() callers and CfgLine users.
---
 src/ConfigParser.cc           |  21 ++-
 src/ConfigParser.h            |  17 ++-
 src/Makefile.am               |   3 +-
 src/base/CharacterSet.cc      |  15 ++
 src/base/CharacterSet.h       |   3 +
 src/cache_cf.cc               | 249 +++++++++++++++++-----------------
 src/cf_gen.cc                 |  19 +--
 src/parser/Tokenizer.cc       |  57 ++++++++
 src/parser/Tokenizer.h        |   8 ++
 src/tests/testACLMaxUserIP.cc |   5 +-
 src/tests/testConfigParser.cc |  14 +-
 src/tests/testRock.cc         |   6 +-
 src/tests/testUfs.cc          |  10 +-
 13 files changed, 258 insertions(+), 169 deletions(-)
diff --git a/src/ConfigParser.cc b/src/ConfigParser.cc
index 0e405eb85ae..ccbea97bd6e 100644
--- a/src/ConfigParser.cc
+++ b/src/ConfigParser.cc
@@ -16,13 +16,14 @@
 #include "fatal.h"
 #include "globals.h"
 #include "neighbors.h"
+#include "parser/Tokenizer.h"
 #include "sbuf/Stream.h"
 
 bool ConfigParser::RecognizeQuotedValues = true;
 bool ConfigParser::StrictMode = true;
 std::stack<ConfigParser::CfgFile *> ConfigParser::CfgFiles;
 ConfigParser::TokenType ConfigParser::LastTokenType = ConfigParser::SimpleToken;
-const char *ConfigParser::CfgLine = nullptr;
+SBuf ConfigParser::CfgLine;
 const char *ConfigParser::CfgPos = nullptr;
 std::queue<char *> ConfigParser::CfgLineTokens_;
 bool ConfigParser::AllowMacros_ = false;
@@ -201,10 +202,10 @@ ConfigParser::UnQuote(const char *token, const char **next)
 }
 
 void
-ConfigParser::SetCfgLine(char *line)
+ConfigParser::SetCfgLine(const SBuf &line)
 {
     CfgLine = line;
-    CfgPos = line;
+    CfgPos = CfgLine.c_str();
     while (!CfgLineTokens_.empty()) {
         char *token = CfgLineTokens_.front();
         CfgLineTokens_.pop();
@@ -561,6 +562,20 @@ ConfigParser::rejectDuplicateDirective()
     throw TextException("duplicate configuration directive", Here());
 }
 
+SBuf
+ConfigParser::openDirective(const SBuf &line)
+{
+    Parser::Tokenizer tk(line);
+    SBuf directiveName; // TODO: Upgrade cfg_directive to SBuf and set it here.
+    static const auto spaceChars = CharacterSet("w_space", w_space);
+    static const auto directiveChars = spaceChars.complement("squid.conf directive name");
+    const auto found = tk.prefix(directiveName, directiveChars);
+    Assure(found); // our callers are expected to fully handle non-directive lines
+    tk.skipAll(spaceChars);
+    SetCfgLine(tk.remaining()); // may be empty
+    return directiveName;
+}
+
 void
 ConfigParser::closeDirective()
 {
diff --git a/src/ConfigParser.h b/src/ConfigParser.h
index f85c20ea3ff..95e02b128b2 100644
--- a/src/ConfigParser.h
+++ b/src/ConfigParser.h
@@ -54,6 +54,11 @@ class ConfigParser
 
     void destruct();
 
+    /// starts parsing the current configuration directive
+    /// \returns directive name
+    /// \sa closeDirective()
+    SBuf openDirective(const SBuf &line);
+
     /// stops parsing the current configuration directive
     void closeDirective();
 
@@ -137,8 +142,8 @@ class ConfigParser
      */
     static char *PeekAtToken();
 
-    /// Set the configuration file line to parse.
-    static void SetCfgLine(char *line);
+    /// Set current directive parameters (i.e. characters after the directive name).
+    static void SetCfgLine(const SBuf &);
 
     /// Allow %macros inside quoted strings
     static void EnableMacros() {AllowMacros_ = true;}
@@ -221,8 +226,12 @@ class ConfigParser
     static char *NextElement(TokenType &type);
     static std::stack<CfgFile *> CfgFiles; ///< The stack of open cfg files
     static TokenType LastTokenType; ///< The type of last parsed element
-    static const char *CfgLine; ///< The current line to parse
-    static const char *CfgPos; ///< Pointer to the next element in cfgLine string
+
+    static SBuf CfgLine; ///< Directive parameters being parsed; \sa SetCfgLine()
+
+    // TODO: Replace with a Tokenizer or a similar iterative parsing class
+    static const char *CfgPos; ///< Pointer to the next element in CfgLine string
+
     static std::queue<char *> CfgLineTokens_; ///< Store the list of tokens for current configuration line
     static bool AllowMacros_;
     static bool ParseQuotedOrToEol_; ///< The next tokens will be handled as quoted or to_eol token
diff --git a/src/Makefile.am b/src/Makefile.am
index 1ce047eed49..bb9b40eaadd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2719,7 +2719,6 @@ tests_testConfigParser_SOURCES = \
 	tests/testConfigParser.cc
 nodist_tests_testConfigParser_SOURCES = \
 	ConfigParser.cc \
-	tests/stub_SBuf.cc \
 	String.cc \
 	tests/stub_acl.cc \
 	tests/stub_cache_cf.cc \
@@ -2728,6 +2727,8 @@ nodist_tests_testConfigParser_SOURCES = \
 	tests/stub_libmem.cc \
 	tests/stub_neighbors.cc
 tests_testConfigParser_LDADD = \
+	parser/libparser.la \
+	sbuf/libsbuf.la \
 	base/libbase.la \
 	$(LIBCPPUNIT_LIBS) \
 	$(COMPAT_LIB) \
diff --git a/src/base/CharacterSet.cc b/src/base/CharacterSet.cc
index 3793b7c745c..e1bcb20035f 100644
--- a/src/base/CharacterSet.cc
+++ b/src/base/CharacterSet.cc
@@ -168,3 +168,18 @@ CharacterSet::RFC3986_UNRESERVED()
     return *chars;
 }
 
+const CharacterSet &
+CharacterSet::libcSpace()
+{
+    const auto creator = []() -> auto {
+        const auto chars = new CharacterSet("libcSpace");
+        for (size_t idx = 0; idx < 256; ++idx) {
+            if (xisspace(idx))
+                chars->add(idx);
+        }
+        return chars;
+    };
+    static const auto chars = creator();
+    return *chars;
+}
+
diff --git a/src/base/CharacterSet.h b/src/base/CharacterSet.h
index 880072c0306..a6eea43c1f2 100644
--- a/src/base/CharacterSet.h
+++ b/src/base/CharacterSet.h
@@ -121,6 +121,9 @@ class CharacterSet
     /// allowed URI characters that do not have a reserved purpose, RFC 3986
     static const CharacterSet &RFC3986_UNRESERVED();
 
+    /// characters that libc isspace(3) matches, including any locale-specific ones
+    static const CharacterSet &libcSpace();
+
 private:
     /** index of characters in this set
      *
diff --git a/src/cache_cf.cc b/src/cache_cf.cc
index d88fc0dcf13..74cc562b6e6 100644
--- a/src/cache_cf.cc
+++ b/src/cache_cf.cc
@@ -54,6 +54,7 @@
 #include "mgr/Registration.h"
 #include "neighbors.h"
 #include "NeighborTypeDomainList.h"
+#include "parser/Tokenizer.h"
 #include "Parsing.h"
 #include "pconn.h"
 #include "PeerDigest.h"
@@ -180,7 +181,7 @@ static void parse_string(char **);
 static void default_all(void);
 static void defaults_if_none(void);
 static void defaults_postscriptum(void);
-static int parse_line(char *);
+static int parse_line(const SBuf &);
 static void parse_obsolete(const char *);
 static void parseBytesLine(size_t * bptr, const char *units);
 static void parseBytesLineSigned(ssize_t * bptr, const char *units);
@@ -217,7 +218,7 @@ static int check_null_IpAddress_list(const Ip::Address_list *);
 #endif /* USE_WCCPv2 */
 
 static void parsePortCfg(AnyP::PortCfgPointer *, const char *protocol);
-#define parse_PortCfg(l) parsePortCfg((l), token)
+#define parse_PortCfg(l) parsePortCfg((l), cfg_directive)
 static void dump_PortCfg(StoreEntry *, const char *, const AnyP::PortCfgPointer &);
 #define free_PortCfg(h)  *(h)=NULL
 
@@ -298,20 +299,23 @@ skip_ws(const char* s)
     return s;
 }
 
+/// Parsers included configuration files identified by their filenames or glob
+/// patterns and included at the given nesting level (a.k.a. depth).
+/// For example, parses include files in `include /path/to/include/files/*.acl`.
+/// \returns the number of errors (that did not immediately terminate parsing)
 static int
-parseManyConfigFiles(char* files, int depth)
+parseIncludedConfigFiles(const SBuf &paths, const int depth)
 {
     int error_count = 0;
-    char* saveptr = nullptr;
+    Parser::Tokenizer tk(paths);
 #if HAVE_GLOB
-    char *path;
     glob_t globbuf;
     int i;
     memset(&globbuf, 0, sizeof(globbuf));
-    for (path = strwordtok(files, &saveptr); path; path = strwordtok(nullptr, &saveptr)) {
-        if (glob(path, globbuf.gl_pathc ? GLOB_APPEND : 0, nullptr, &globbuf) != 0) {
-            int xerrno = errno;
-            fatalf("Unable to find configuration file: %s: %s", path, xstrerr(xerrno));
+    while (auto path = NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(tk)) {
+        if (glob(path->c_str(), globbuf.gl_pathc ? GLOB_APPEND : 0, nullptr, &globbuf) != 0) {
+            const auto xerrno = errno;
+            throw TextException(ToSBuf("Cannot find included configuration file named ", *path, ": ", xstrerr(xerrno)), Here());
         }
     }
     for (i = 0; i < (int)globbuf.gl_pathc; ++i) {
@@ -319,118 +323,120 @@ parseManyConfigFiles(char* files, int depth)
     }
     globfree(&globbuf);
 #else
-    char* file = strwordtok(files, &saveptr);
-    while (file != NULL) {
-        error_count += parseOneConfigFile(file, depth);
-        file = strwordtok(nullptr, &saveptr);
+    while (auto path = NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(tk)) {
+        error_count += parseOneConfigFile(path->c_str(), depth);
     }
 #endif /* HAVE_GLOB */
     return error_count;
 }
 
+/// Replaces all occurrences of macroName in buf with macroValue. When looking
+/// for the next macroName occurrence, this one-scan algorithm does not revisit
+/// previously scanned buf areas and does not visit replaced values.
 static void
-ReplaceSubstr(char*& str, int& len, unsigned substrIdx, unsigned substrLen, const char* newSubstr)
+SubstituteMacro(SBuf &buf, const SBuf &macroName, const SBuf &macroValue)
 {
-    assert(str != nullptr);
-    assert(newSubstr != nullptr);
-
-    unsigned newSubstrLen = strlen(newSubstr);
-    if (newSubstrLen > substrLen)
-        str = (char*)realloc(str, len - substrLen + newSubstrLen + 1);
-
-    // move tail part including zero
-    memmove(str + substrIdx + newSubstrLen, str + substrIdx + substrLen, len - substrIdx - substrLen + 1);
-    // copy new substring in place
-    memcpy(str + substrIdx, newSubstr, newSubstrLen);
+    SBuf remainingInput = buf;
+    buf.clear();
+    while (!remainingInput.isEmpty()) {
+        const auto pos = remainingInput.find(macroName);
+        if (pos == SBuf::npos) {
+            buf.append(remainingInput);
+            return;
+        }
 
-    len = strlen(str);
+        buf.append(remainingInput.substr(0, pos));
+        buf.append(macroValue);
+        remainingInput.chop(pos + macroName.length());
+    }
 }
 
 static void
-SubstituteMacro(char*& line, int& len, const char* macroName, const char* substStr)
+ProcessMacros(SBuf &buf)
 {
-    assert(line != nullptr);
-    assert(macroName != nullptr);
-    assert(substStr != nullptr);
-    unsigned macroNameLen = strlen(macroName);
-    while (const char* macroPos = strstr(line, macroName)) // we would replace all occurrences
-        ReplaceSubstr(line, len, macroPos - line, macroNameLen, substStr);
+    static const auto macroServiceName = SBuf("${service_name}");
+    static const auto macroProcessName = SBuf("${process_name}");
+    static const auto macroProcessNumber = SBuf("${process_number}");
+    static const auto kidIdentifier = ToSBuf(KidIdentifier);
+    SubstituteMacro(buf, macroServiceName, service_name);
+    SubstituteMacro(buf, macroProcessName, TheKidName);
+    SubstituteMacro(buf, macroProcessNumber, kidIdentifier);
 }
 
-static void
-ProcessMacros(char*& line, int& len)
+/// extracts the next optional token surrounded by optional space
+static std::optional<SBuf>
+ExtractOptionalToken(Parser::Tokenizer &tk)
 {
-    SubstituteMacro(line, len, "${service_name}", service_name.c_str());
-    SubstituteMacro(line, len, "${process_name}", TheKidName.c_str());
-    SubstituteMacro(line, len, "${process_number}", xitoa(KidIdentifier));
+    const auto &spaceChars = CharacterSet::libcSpace();
+    (void)tk.skipAll(spaceChars);
+
+    SBuf token;
+    static const auto tokenChars = spaceChars.complement("ExtractOptionalToken() token chars");
+    if (!tk.prefix(token, tokenChars))
+        return std::nullopt;
+
+    (void)tk.skipAll(spaceChars);
+    return token;
 }
 
-static void
-trim_trailing_ws(char* str)
+/// extracts the next required token surrounded by optional space
+static SBuf
+ExtractToken(const char * const description, Parser::Tokenizer &tk)
 {
-    assert(str != nullptr);
-    unsigned i = strlen(str);
-    while ((i > 0) && xisspace(str[i - 1]))
-        --i;
-    str[i] = '\0';
+    if (const auto token = ExtractOptionalToken(tk))
+        return *token;
+    throw TextException(ToSBuf("cannot find ", description), Here());
 }
 
-static const char*
-FindStatement(const char* line, const char* statement)
+/// extracts the next token if it matches the given keyword
+/// \returns whether the token was extracted
+static bool
+SkipKeyword(const SBuf &keyword, Parser::Tokenizer &tk)
 {
-    assert(line != nullptr);
-    assert(statement != nullptr);
-
-    const char* str = skip_ws(line);
-    unsigned len = strlen(statement);
-    if (strncmp(str, statement, len) == 0) {
-        str += len;
-        if (*str == '\0')
-            return str;
-        else if (xisspace(*str))
-            return skip_ws(str);
+    const auto savedTk = tk;
+    if (const auto token = ExtractOptionalToken(tk)) {
+        if (token == keyword)
+            return true;
     }
-
-    return nullptr;
+    tk = savedTk;
+    return false;
 }
 
-static bool
-StrToInt(const char* str, long& number)
+/// extracts the next input token (skipping any leading space) and interprets it as
+/// a signed integer (in decimal, hex, or octal base per Parser::Tokenizer::int64())
+static int64_t
+ExtractNumber(const char * const description, Parser::Tokenizer &tk)
 {
-    assert(str != nullptr);
-
-    char* end;
-    number = strtol(str, &end, 0);
-
-    return (end != str) && (*end == '\0'); // returns true if string contains nothing except number
+    auto numberParser = Parser::Tokenizer(ExtractToken(description, tk));
+    int64_t result = 0;
+    if (!numberParser.int64(result, 0, true))
+        throw TextException(ToSBuf("cannot parse ", description), Here());
+    if (!numberParser.remaining().isEmpty())
+        throw TextException(ToSBuf("found trailing garbage after parsing ", description), Here());
+    return result;
 }
 
 static bool
-EvalBoolExpr(const char* expr)
+EvalBoolExpr(Parser::Tokenizer &tk)
 {
-    assert(expr != nullptr);
-    if (strcmp(expr, "true") == 0) {
+    static const auto keywordTrue = SBuf("true");
+    if (tk.remaining() == keywordTrue)
         return true;
-    } else if (strcmp(expr, "false") == 0) {
+
+    static const auto keywordFalse = SBuf("false");
+    if (tk.remaining() == keywordFalse)
         return false;
-    } else if (const char* equation = strchr(expr, '=')) {
-        const char* rvalue = skip_ws(equation + 1);
-        char* lvalue = (char*)xmalloc(equation - expr + 1);
-        xstrncpy(lvalue, expr, equation - expr + 1);
-        trim_trailing_ws(lvalue);
 
-        long number1;
-        if (!StrToInt(lvalue, number1))
-            fatalf("String is not a integer number: '%s'\n", lvalue);
-        long number2;
-        if (!StrToInt(rvalue, number2))
-            fatalf("String is not a integer number: '%s'\n", rvalue);
+    const auto lhs = ExtractNumber("left-hand operand of an equality condition", tk);
 
-        xfree(lvalue);
-        return number1 == number2;
-    }
-    fatalf("Unable to evaluate expression '%s'\n", expr);
-    return false; // this place cannot be reached
+    const auto op = ExtractToken("equality sign in an equality condition", tk);
+    static const auto keywordEqual = SBuf("=");
+    if (op != keywordEqual)
+        throw TextException(ToSBuf("expected equality sign (=) but got ", op), Here());
+
+    const auto rhs = ExtractNumber("right-hand operand of an equality condition", tk);
+
+    return lhs == rhs;
 }
 
 static int
@@ -440,8 +446,6 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
     const char *orig_cfg_filename = cfg_filename;
     const int orig_config_lineno = config_lineno;
     char *token = nullptr;
-    char *tmp_line = nullptr;
-    int tmp_line_len = 0;
     int err_count = 0;
     int is_pipe = 0;
 
@@ -473,6 +477,9 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
 
     config_lineno = 0;
 
+    // sequential raw input lines merged to honor line continuation markers
+    SBuf wholeLine;
+
     std::vector<bool> if_states;
     while (fgets(config_input_line, BUFSIZ, fp)) {
         ++config_lineno;
@@ -522,46 +529,47 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
         if (config_input_line[0] == '\0')
             continue;
 
-        const char* append = tmp_line_len ? skip_ws(config_input_line) : config_input_line;
+        wholeLine.append(config_input_line);
 
-        size_t append_len = strlen(append);
-
-        tmp_line = (char*)xrealloc(tmp_line, tmp_line_len + append_len + 1);
-
-        strcpy(tmp_line + tmp_line_len, append);
-
-        tmp_line_len += append_len;
-
-        if (tmp_line[tmp_line_len-1] == '\\') {
-            debugs(3, 5, "parseConfigFile: tmp_line='" << tmp_line << "'");
-            tmp_line[--tmp_line_len] = '\0';
+        if (!wholeLine.isEmpty() && *wholeLine.rbegin() == '\\') {
+            debugs(3, 5, "expecting line continuation after " << wholeLine);
+            wholeLine.chop(0, wholeLine.length() - 1); // drop trailing backslash
             continue;
         }
 
-        trim_trailing_ws(tmp_line);
-        ProcessMacros(tmp_line, tmp_line_len);
-        debugs(3, (opt_parse_cfg_only?1:5), "Processing: " << tmp_line);
+        ProcessMacros(wholeLine);
+        auto tk = Parser::Tokenizer(wholeLine);
 
-        if (const char* expr = FindStatement(tmp_line, "if")) {
-            if_states.push_back(EvalBoolExpr(expr)); // store last if-statement meaning
-        } else if (FindStatement(tmp_line, "endif")) {
+        const auto &spaces = CharacterSet::libcSpace();
+        // (void)tk.skipAll(spaces) is not necessary due to earlier skip_ws()
+        (void)tk.skipAllTrailing(spaces);
+
+        debugs(3, (opt_parse_cfg_only?1:5), "Processing: " << tk.remaining());
+
+        static const auto keywordIf = SBuf("if");
+        static const auto keywordElse = SBuf("else");
+        static const auto keywordEndif = SBuf("endif");
+        if (SkipKeyword(keywordIf, tk)) {
+            if_states.push_back(EvalBoolExpr(tk)); // store last if-statement meaning
+        } else if (SkipKeyword(keywordEndif, tk)) {
             if (!if_states.empty())
                 if_states.pop_back(); // remove last if-statement meaning
             else
                 fatalf("'endif' without 'if'\n");
-        } else if (FindStatement(tmp_line, "else")) {
+        } else if (SkipKeyword(keywordElse, tk)) {
             if (!if_states.empty())
                 if_states.back() = !if_states.back();
             else
                 fatalf("'else' without 'if'\n");
         } else if (if_states.empty() || if_states.back()) { // test last if-statement meaning if present
             /* Handle includes here */
-            if (tmp_line_len >= 9 && strncmp(tmp_line, "include", 7) == 0 && xisspace(tmp_line[7])) {
-                err_count += parseManyConfigFiles(tmp_line + 8, depth + 1);
+            static const auto keywordInclude = SBuf("include");
+            if (SkipKeyword(keywordInclude, tk)) {
+                err_count += parseIncludedConfigFiles(tk.remaining(), depth + 1);
             } else {
                 try {
-                    if (!parse_line(tmp_line)) {
-                        debugs(3, DBG_CRITICAL, ConfigParser::CurrentLocation() << ": unrecognized: '" << tmp_line << "'");
+                    if (!parse_line(wholeLine)) {
+                        debugs(3, DBG_CRITICAL, ConfigParser::CurrentLocation() << ": unrecognized: '" << wholeLine << "'");
                         ++err_count;
                     }
                 } catch (...) {
@@ -572,9 +580,7 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
             }
         }
 
-        safe_free(tmp_line);
-        tmp_line_len = 0;
-
+        wholeLine.clear();
     }
     if (!if_states.empty())
         fatalf("if-statement without 'endif'\n");
@@ -591,7 +597,6 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
     SetConfigFilename(orig_cfg_filename, false);
     config_lineno = orig_config_lineno;
 
-    xfree(tmp_line);
     return err_count;
 }
 
@@ -671,10 +676,10 @@ ParseDirective(T &raw, ConfigParser &parser)
     if (SawDirective(raw))
         parser.rejectDuplicateDirective();
 
-    // TODO: parser.openDirective(directiveName);
     Must(!raw);
     raw = Configuration::Component<T>::Parse(parser);
     Must(raw);
+    // TODO: Move to parse_line() when ready to reject trailing garbage in all directives.
     parser.closeDirective();
 }
 
@@ -4385,20 +4390,20 @@ sslBumpCfgRr::finalizeConfig()
 {
     if (lastDeprecatedRule != Ssl::bumpEnd) {
         assert( lastDeprecatedRule == Ssl::bumpClientFirst || lastDeprecatedRule == Ssl::bumpNone);
-        static char buf[1024];
+        SBuf conversionRule;
         if (lastDeprecatedRule == Ssl::bumpClientFirst) {
-            strcpy(buf, "ssl_bump deny all");
+            conversionRule = SBuf("ssl_bump deny all");
             debugs(3, DBG_CRITICAL, "WARNING: auto-converting deprecated implicit "
                    "\"ssl_bump deny all\" to \"ssl_bump none all\". New ssl_bump configurations "
                    "must not use implicit rules. Update your ssl_bump rules.");
         } else {
-            strcpy(buf, "ssl_bump allow all");
+            conversionRule = SBuf("ssl_bump allow all");
             debugs(3, DBG_CRITICAL, "SECURITY NOTICE: auto-converting deprecated implicit "
                    "\"ssl_bump allow all\" to \"ssl_bump client-first all\" which is usually "
                    "inferior to the newer server-first bumping mode. New ssl_bump"
                    " configurations must not use implicit rules. Update your ssl_bump rules.");
         }
-        parse_line(buf);
+        parse_line(conversionRule);
     }
 }
 
diff --git a/src/cf_gen.cc b/src/cf_gen.cc
index 3ffe4da46c4..b7b8ad90351 100644
--- a/src/cf_gen.cc
+++ b/src/cf_gen.cc
@@ -466,13 +466,11 @@ gen_default(const EntryList &head, std::ostream &fout)
     fout << "static void" << std::endl <<
          "default_line(const char *s)" << std::endl <<
          "{" << std::endl <<
-         "    char *tmp_line = xstrdup(s);" << std::endl <<
-         "    int len = strlen(tmp_line);" << std::endl <<
-         "    ProcessMacros(tmp_line, len);" << std::endl <<
-         "    xstrncpy(config_input_line, tmp_line, sizeof(config_input_line));" << std::endl <<
+         "    SBuf tmp_line(s);" << std::endl <<
+         "    ProcessMacros(tmp_line);" << std::endl <<
+         "    xstrncpy(config_input_line, tmp_line.c_str(), sizeof(config_input_line));" << std::endl <<
          "    config_lineno++;" << std::endl <<
          "    parse_line(tmp_line);" << std::endl <<
-         "    xfree(tmp_line);" << std::endl <<
          "}" << std::endl << std::endl;
     fout << "static void" << std::endl <<
          "default_all(void)" << std::endl <<
@@ -595,7 +593,7 @@ gen_default_postscriptum(const EntryList &head, std::ostream &fout)
 void
 Entry::genParseAlias(const std::string &aName, std::ostream &fout) const
 {
-    fout << "    if (!strcmp(token, \"" << aName << "\")) {" << std::endl;
+    fout << "    if (directiveName.cmp(\"" << aName << "\") == 0) {" << std::endl;
     if (ifdef.size())
         fout << "#if " << ifdef << std::endl;
     fout << "        cfg_directive = \"" << aName << "\";" << std::endl;
@@ -606,7 +604,7 @@ Entry::genParseAlias(const std::string &aName, std::ostream &fout) const
             // offset line to strip initial whitespace tab byte
             fout << "        debugs(0, DBG_PARSE_NOTE(DBG_IMPORTANT), \"" << aName << " : " << &l[1] << "\");" << std::endl;
         }
-        fout << "        parse_obsolete(token);";
+        fout << "        parse_obsolete(cfg_directive);";
     } else if (!loc.size() || loc.compare("none") == 0) {
         fout << "parse_" << type << "();";
     } else if (type.find("::") != std::string::npos) {
@@ -646,12 +644,9 @@ gen_parse(const EntryList &head, std::ostream &fout)
 {
     fout <<
          "static int\n"
-         "parse_line(char *buff)\n"
+         "parse_line(const SBuf &buf)\n"
          "{\n"
-         "\tchar\t*token;\n"
-         "\tif ((token = strtok(buff, w_space)) == NULL) \n"
-         "\t\treturn 1;\t/* ignore empty lines */\n"
-         "\tConfigParser::SetCfgLine(strtok(nullptr, \"\"));\n";
+         "\tconst auto directiveName = LegacyParser.openDirective(buf);\n";
 
     for (const auto &e : head)
         e.genParse(fout);
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 51654bae90d..41cf9971fd3 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -333,3 +333,60 @@ Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
     return result;
 }
 
+/// NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem() helper that
+/// parses an optional "quoted string" sequence at the start of its input.
+static std::optional<SBuf>
+UnquoteSequence_(Parser::Tokenizer &tk)
+{
+    if (!tk.skipOne(CharacterSet::DQUOTE))
+        return std::nullopt;
+
+    SBuf decoded;
+    while (!tk.atEnd()) {
+        SBuf prefix;
+        static const auto backslashChar = CharacterSet("backslash", "\\");
+        static const auto ordinaryChars = (CharacterSet::DQUOTE + backslashChar).complement();
+        if (tk.prefix(prefix, ordinaryChars))
+            decoded.append(prefix);
+
+        if (tk.skipOne(CharacterSet::DQUOTE))
+            return decoded; // well-formed "quoted string"
+
+        if (tk.skipOne(backslashChar)) {
+            SBuf escaped;
+            static const auto allChars = CharacterSet().complement("any char");
+            if (tk.prefix(escaped, allChars, 1))
+                decoded.append(escaped);
+            else
+                Assure(tk.atEnd()); // input ends at backslash (which we skip)
+        }
+    }
+
+    return decoded; // truncated "quoted string" (no closing quote); may be empty
+}
+
+std::optional<SBuf>
+NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(Parser::Tokenizer &tk)
+{
+    const auto &spaceChars = CharacterSet::libcSpace();
+
+    (void)tk.skipAll(spaceChars);
+    if (tk.atEnd())
+        return std::nullopt;
+
+    SBuf decoded;
+    do {
+        SBuf prefix;
+        static const auto ordinaryChars = (CharacterSet::DQUOTE + spaceChars).complement();
+        if (tk.prefix(prefix, ordinaryChars))
+            decoded.append(prefix);
+
+        if (const auto sequence = UnquoteSequence_(tk))
+            decoded.append(*sequence);
+
+        if (tk.skipOne(spaceChars))
+            break;
+    } while (!tk.atEnd());
+    return decoded; // may be empty (e.g., ` ""` or `"\`)
+}
+
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
index ffa3076a512..d685ae58a79 100644
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -12,6 +12,8 @@
 #include "base/CharacterSet.h"
 #include "sbuf/SBuf.h"
 
+#include <optional>
+
 /// Generic protocol-agnostic parsing tools
 namespace Parser
 {
@@ -179,5 +181,11 @@ class Tokenizer
 
 } /* namespace Parser */
 
+/// Extracts the next "word" from Tokenizer::remaining() input using legacy
+/// strwordtok() decoding conventions. Allows upgrading strwordtok() callers to
+/// SBuf while preserving their "rudimentary" parsing grammar/logic.
+/// \returns std::nullopt when remaining input is zero or more isspace(3) characters
+std::optional<SBuf> NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(Parser::Tokenizer &);
+
 #endif /* SQUID_SRC_PARSER_TOKENIZER_H */
 
diff --git a/src/tests/testACLMaxUserIP.cc b/src/tests/testACLMaxUserIP.cc
index fea6a8492b9..b78aa44dba7 100644
--- a/src/tests/testACLMaxUserIP.cc
+++ b/src/tests/testACLMaxUserIP.cc
@@ -70,10 +70,8 @@ MyTestProgram::startup()
 void
 TestACLMaxUserIP::testParseLine()
 {
-    /* a config line to pass with a lead-in token to seed the parser. */
-    char * line = xstrdup("test max_user_ip -s 1");
     /* seed the parser */
-    ConfigParser::SetCfgLine(line);
+    ConfigParser::SetCfgLine(SBuf("test max_user_ip -s 1"));
     Acl::Node *anACL = nullptr;
     ConfigParser LegacyParser;
     Acl::Node::ParseAclLine(LegacyParser, &anACL);
@@ -87,7 +85,6 @@ TestACLMaxUserIP::testParseLine()
         CPPUNIT_ASSERT_EQUAL(true, maxUserIpACL->valid());
     }
     delete anACL;
-    xfree(line);
 }
 
 int
diff --git a/src/tests/testConfigParser.cc b/src/tests/testConfigParser.cc
index 3a8e1a63a67..34085d4cb8e 100644
--- a/src/tests/testConfigParser.cc
+++ b/src/tests/testConfigParser.cc
@@ -9,6 +9,7 @@
 #include "squid.h"
 #include "compat/cppunit.h"
 #include "ConfigParser.h"
+#include "sbuf/SBuf.h"
 #include "SquidString.h"
 #include "unitTestMain.h"
 
@@ -31,18 +32,11 @@ CPPUNIT_TEST_SUITE_REGISTRATION( TestConfigParser );
 
 int shutting_down = 0;
 
-bool TestConfigParser::doParseQuotedTest(const char *s, const char *expectInterp)
+bool
+TestConfigParser::doParseQuotedTest(const char * const cfgparam, const char * const expectInterp)
 {
-    char cfgline[2048];
-    char cfgparam[2048];
-    snprintf(cfgline, 2048, "%s", s);
-
-    // Keep the initial value on cfgparam. The ConfigParser  methods will write on cfgline
-    strncpy(cfgparam, cfgline, sizeof(cfgparam)-1);
-    cfgparam[sizeof(cfgparam)-1] = '\0';
-
     // Initialize parser to point to the start of quoted string
-    ConfigParser::SetCfgLine(cfgline);
+    ConfigParser::SetCfgLine(SBuf(cfgparam));
     String unEscaped = ConfigParser::NextToken();
 
     const bool interpOk = (unEscaped.cmp(expectInterp) == 0);
diff --git a/src/tests/testRock.cc b/src/tests/testRock.cc
index bb89e8c5296..d212baf3393 100644
--- a/src/tests/testRock.cc
+++ b/src/tests/testRock.cc
@@ -91,17 +91,13 @@ TestRock::setUp()
 
     char *path=xstrdup(TESTDIR);
 
-    char *config_line=xstrdup("10 max-size=16384");
-
-    ConfigParser::SetCfgLine(config_line);
+    ConfigParser::SetCfgLine(SBuf("10 max-size=16384"));
 
     store->parse(0, path);
     store_maxobjsize = 1024*1024*2;
 
     safe_free(path);
 
-    safe_free(config_line);
-
     /* ok, ready to create */
     store->create();
 
diff --git a/src/tests/testUfs.cc b/src/tests/testUfs.cc
index 5daec5e5b99..a2b1a3132d9 100644
--- a/src/tests/testUfs.cc
+++ b/src/tests/testUfs.cc
@@ -121,19 +121,15 @@ TestUfs::testUfsSearch()
 
     char *path=xstrdup(TESTDIR);
 
-    char *config_line=xstrdup("100 1 1");
-
     visible_appname_string = xstrdup(PACKAGE "/" VERSION);
 
-    ConfigParser::SetCfgLine(config_line);
+    ConfigParser::SetCfgLine(SBuf("100 1 1"));
 
     aStore->parse(0, path);
     store_maxobjsize = 1024*1024*2;
 
     safe_free(path);
 
-    safe_free(config_line);
-
     /* ok, ready to create */
     aStore->create();
 
@@ -240,11 +236,9 @@ TestUfs::testUfsDefaultEngine()
     mem_policy = createRemovalPolicy(Config.replPolicy);
 
     char *path=xstrdup(TESTDIR);
-    char *config_line=xstrdup("100 1 1");
-    ConfigParser::SetCfgLine(config_line);
+    ConfigParser::SetCfgLine(SBuf("100 1 1"));
     aStore->parse(0, path);
     safe_free(path);
-    safe_free(config_line);
     CPPUNIT_ASSERT(aStore->IO->io != nullptr);
 
     free_cachedir(&Config.cacheSwap);

From f0fe5ba03117a484ca1c36da42e901aefa80ec68 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Thu, 13 Jun 2024 14:09:50 -0400
Subject: [PATCH 02/13] fixup: Do not insist on space around pp condition
 operator

    if 0=1

Our preprocessor grammar is not based on space-separated tokens!
---
 src/cache_cf.cc | 166 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 120 insertions(+), 46 deletions(-)

diff --git a/src/cache_cf.cc b/src/cache_cf.cc
index 74cc562b6e6..f9c7c5dc495 100644
--- a/src/cache_cf.cc
+++ b/src/cache_cf.cc
@@ -363,80 +363,156 @@ ProcessMacros(SBuf &buf)
     SubstituteMacro(buf, macroProcessNumber, kidIdentifier);
 }
 
-/// extracts the next optional token surrounded by optional space
-static std::optional<SBuf>
-ExtractOptionalToken(Parser::Tokenizer &tk)
+/// extracts all leading space characters (if any)
+/// \returns whether at least one character was extracted
+static bool
+SkipOptionalSpace_(Parser::Tokenizer &tk)
 {
     const auto &spaceChars = CharacterSet::libcSpace();
-    (void)tk.skipAll(spaceChars);
+    return tk.skipAll(spaceChars);
+}
+
+/// extracts all (and at least one) characters matching tokenChars surrounded by optional space
+static SBuf
+ExtractToken(const char * const description, Parser::Tokenizer &tk, const CharacterSet &tokenChars)
+{
+    const auto savedTk = tk;
+
+    (void)SkipOptionalSpace_(tk);
 
     SBuf token;
-    static const auto tokenChars = spaceChars.complement("ExtractOptionalToken() token chars");
-    if (!tk.prefix(token, tokenChars))
-        return std::nullopt;
+    if (tk.prefix(token, tokenChars)) {
+        (void)SkipOptionalSpace_(tk);
+        return token;
+    }
 
-    (void)tk.skipAll(spaceChars);
-    return token;
+    tk = savedTk;
+    throw TextException(ToSBuf("cannot find ", description, " near ", tk.remaining()), Here());
 }
 
-/// extracts the next required token surrounded by optional space
+/// extracts an operand of a preprocessor condition
 static SBuf
-ExtractToken(const char * const description, Parser::Tokenizer &tk)
+ExtractOperand(const char * const description, Parser::Tokenizer &tk)
 {
-    if (const auto token = ExtractOptionalToken(tk))
-        return *token;
-    throw TextException(ToSBuf("cannot find ", description), Here());
+    static const auto operandChars = (CharacterSet::ALPHA + CharacterSet::DIGIT).add('-').add('+').rename("preprocessor condition operand");
+    return ExtractToken(description, tk, operandChars);
 }
 
-/// extracts the next token if it matches the given keyword
-/// \returns whether the token was extracted
-static bool
-SkipKeyword(const SBuf &keyword, Parser::Tokenizer &tk)
+/// extracts an operator of a preprocessor condition
+static SBuf
+ExtractOperator(const char * const description, Parser::Tokenizer &tk)
 {
-    const auto savedTk = tk;
-    if (const auto token = ExtractOptionalToken(tk)) {
-        if (token == keyword)
-            return true;
-    }
-    tk = savedTk;
-    return false;
+    static const auto operatorChars = CharacterSet("preprocessor condition operator", "<=>%/*!^!");
+    return ExtractToken(description, tk, operatorChars);
 }
 
-/// extracts the next input token (skipping any leading space) and interprets it as
-/// a signed integer (in decimal, hex, or octal base per Parser::Tokenizer::int64())
+/// interprets the given raw string as a signed integer (in decimal, hex, or
+/// octal base per Parser::Tokenizer::int64())
 static int64_t
-ExtractNumber(const char * const description, Parser::Tokenizer &tk)
+EvalNumber(const SBuf &raw)
 {
-    auto numberParser = Parser::Tokenizer(ExtractToken(description, tk));
+    auto numberParser = Parser::Tokenizer(raw);
     int64_t result = 0;
     if (!numberParser.int64(result, 0, true))
-        throw TextException(ToSBuf("cannot parse ", description), Here());
+        throw TextException(ToSBuf("malformed integer near ", raw), Here());
     if (!numberParser.remaining().isEmpty())
-        throw TextException(ToSBuf("found trailing garbage after parsing ", description), Here());
+        throw TextException(ToSBuf("found trailing garbage after parsing an integer near ", raw), Here());
     return result;
 }
 
+/// EvalBoolExpr() helper that interprets input prefix as a preprocessor condition
 static bool
-EvalBoolExpr(Parser::Tokenizer &tk)
+EvalBoolExpr_(Parser::Tokenizer &tk)
 {
+    const auto operand = ExtractOperand("preprocessor condition", tk);
+
     static const auto keywordTrue = SBuf("true");
-    if (tk.remaining() == keywordTrue)
+    if (operand == keywordTrue)
         return true;
 
     static const auto keywordFalse = SBuf("false");
-    if (tk.remaining() == keywordFalse)
+    if (operand == keywordFalse)
         return false;
 
-    const auto lhs = ExtractNumber("left-hand operand of an equality condition", tk);
+    const auto lhs = operand;
 
-    const auto op = ExtractToken("equality sign in an equality condition", tk);
+    const auto op = ExtractOperator("equality sign in an equality condition", tk);
     static const auto keywordEqual = SBuf("=");
     if (op != keywordEqual)
         throw TextException(ToSBuf("expected equality sign (=) but got ", op), Here());
 
-    const auto rhs = ExtractNumber("right-hand operand of an equality condition", tk);
+    const auto rhs = ExtractOperand("right-hand operand of an equality condition", tk);
+    return EvalNumber(lhs) == EvalNumber(rhs);
+}
+
+/// interprets input as the first line of a preprocessor `if` statement
+/// \returns std::nullopt if input does not look like an `if` statement
+/// \returns `if` condition value if input is an `if` statement
+static std::optional<bool>
+FindIfStatementOpening(Parser::Tokenizer &tk)
+{
+    const auto savedTk = tk;
+
+    // grammar: space* "if" space condition space* END
+    (void)SkipOptionalSpace_(tk);
+    const auto keywordIf = SBuf("if");
+    if (tk.skip(keywordIf) && SkipOptionalSpace_(tk)) {
+        const auto result = EvalBoolExpr_(tk);
+        SkipOptionalSpace_(tk);
+        if (tk.atEnd())
+            return result;
+        throw TextException("found trailing garbage after parsing a preprocessor condition", Here());
+    }
 
-    return lhs == rhs;
+    // e.g., "iffy_error_responses on"
+    tk = savedTk;
+    return std::nullopt;
+}
+
+/// interprets input as an `else` or `endif` line of a preprocessor `if` statement
+/// \returns false if input does not look like an `else` or `endif` line
+static bool
+FindIfStatementLine(const SBuf &keyword, Parser::Tokenizer &tk)
+{
+    const auto savedTk = tk;
+
+    // grammar: space* keyword space* END
+    (void)SkipOptionalSpace_(tk);
+    if (tk.skip(keyword)) {
+        if (tk.atEnd())
+            return true;
+
+        if (SkipOptionalSpace_(tk)) {
+            if (tk.atEnd())
+                return true;
+            throw TextException(ToSBuf("found trailing garbage after preprocessor keyword: ", keyword), Here());
+        }
+        // e.g., "elseif"
+    }
+
+    tk = savedTk;
+    return false;
+}
+
+/// interprets input as an `include <files>` preprocessor directive
+/// \returns std::nullopt if input does not look like an `include` statement
+/// \returns `include` parameters if input is an `include` statement
+static std::optional<SBuf>
+FindIncludeLine(Parser::Tokenizer &tk)
+{
+    const auto savedTk = tk;
+
+    // grammar: space* "include" space files space* END
+    (void)SkipOptionalSpace_(tk);
+    const auto keywordInclude = SBuf("include");
+    if (tk.skip(keywordInclude) && SkipOptionalSpace_(tk)) {
+        // for simplicity sake, we leave trailing space, if any, in the result
+        return tk.remaining();
+    }
+
+    // e.g., "include_version_info allow all"
+    tk = savedTk;
+    return std::nullopt;
 }
 
 static int
@@ -546,26 +622,24 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
 
         debugs(3, (opt_parse_cfg_only?1:5), "Processing: " << tk.remaining());
 
-        static const auto keywordIf = SBuf("if");
         static const auto keywordElse = SBuf("else");
         static const auto keywordEndif = SBuf("endif");
-        if (SkipKeyword(keywordIf, tk)) {
-            if_states.push_back(EvalBoolExpr(tk)); // store last if-statement meaning
-        } else if (SkipKeyword(keywordEndif, tk)) {
+        if (const auto condition = FindIfStatementOpening(tk)) {
+            if_states.push_back(*condition); // store last if-statement meaning
+        } else if (FindIfStatementLine(keywordEndif, tk)) {
             if (!if_states.empty())
                 if_states.pop_back(); // remove last if-statement meaning
             else
                 fatalf("'endif' without 'if'\n");
-        } else if (SkipKeyword(keywordElse, tk)) {
+        } else if (FindIfStatementLine(keywordElse, tk)) {
             if (!if_states.empty())
                 if_states.back() = !if_states.back();
             else
                 fatalf("'else' without 'if'\n");
         } else if (if_states.empty() || if_states.back()) { // test last if-statement meaning if present
             /* Handle includes here */
-            static const auto keywordInclude = SBuf("include");
-            if (SkipKeyword(keywordInclude, tk)) {
-                err_count += parseIncludedConfigFiles(tk.remaining(), depth + 1);
+            if (const auto files = FindIncludeLine(tk)) {
+                err_count += parseIncludedConfigFiles(*files, depth + 1);
             } else {
                 try {
                     if (!parse_line(wholeLine)) {

From 49084f94f51950e11ba9d1655e0e30dd6dc6d124 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Thu, 13 Jun 2024 17:40:36 -0400
Subject: [PATCH 03/13] fixup: Added a preprocessor test

Condition "00=000" will need to be replaced with "0=000"
when int64() bug is fixed.
---
 test-suite/squidconf/pp-ifs.conf              | 24 +++++++++++++++++++
 test-suite/squidconf/pp-ifs.conf.instructions |  1 +
 2 files changed, 25 insertions(+)
 create mode 100644 test-suite/squidconf/pp-ifs.conf
 create mode 100644 test-suite/squidconf/pp-ifs.conf.instructions

diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf
new file mode 100644
index 00000000000..2afd7e30018
--- /dev/null
+++ b/test-suite/squidconf/pp-ifs.conf
@@ -0,0 +1,24 @@
+## Copyright (C) 1996-2023 The Squid Software Foundation and contributors
+##
+## Squid software is distributed under GPLv2+ license and includes
+## contributions from numerous individuals and organizations.
+## Please see the COPYING and CONTRIBUTORS files for details.
+##
+
+if true
+    if 10 = 0xA
+        if 00=000
+            if  -0= +0
+            # if -1=+1
+                if false
+                    unreachable_directive
+                else
+                    if preprocessor rejects this particular malformed line, then the test passes
+                endif
+            # endif
+            endif
+        endif
+    else
+        if this unreachable statement reached then preprocessor failed
+    endif
+endif
diff --git a/test-suite/squidconf/pp-ifs.conf.instructions b/test-suite/squidconf/pp-ifs.conf.instructions
new file mode 100644
index 00000000000..bf16ed5ad87
--- /dev/null
+++ b/test-suite/squidconf/pp-ifs.conf.instructions
@@ -0,0 +1 @@
+expect-failure FATAL:.*particular.malformed.line

From 6d897ce057b8838ee232fcd1701fba7d2894c11f Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Thu, 13 Jun 2024 18:18:42 -0400
Subject: [PATCH 04/13] fixup: Simplified and polished preprocessor parsing
 code

---
 src/cache_cf.cc | 76 ++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/cache_cf.cc b/src/cache_cf.cc
index f9c7c5dc495..56c64ac15cc 100644
--- a/src/cache_cf.cc
+++ b/src/cache_cf.cc
@@ -366,7 +366,7 @@ ProcessMacros(SBuf &buf)
 /// extracts all leading space characters (if any)
 /// \returns whether at least one character was extracted
 static bool
-SkipOptionalSpace_(Parser::Tokenizer &tk)
+SkipOptionalSpace(Parser::Tokenizer &tk)
 {
     const auto &spaceChars = CharacterSet::libcSpace();
     return tk.skipAll(spaceChars);
@@ -378,11 +378,11 @@ ExtractToken(const char * const description, Parser::Tokenizer &tk, const Charac
 {
     const auto savedTk = tk;
 
-    (void)SkipOptionalSpace_(tk);
+    (void)SkipOptionalSpace(tk);
 
     SBuf token;
     if (tk.prefix(token, tokenChars)) {
-        (void)SkipOptionalSpace_(tk);
+        (void)SkipOptionalSpace(tk);
         return token;
     }
 
@@ -402,10 +402,21 @@ ExtractOperand(const char * const description, Parser::Tokenizer &tk)
 static SBuf
 ExtractOperator(const char * const description, Parser::Tokenizer &tk)
 {
-    static const auto operatorChars = CharacterSet("preprocessor condition operator", "<=>%/*!^!");
+    static const auto operatorChars = CharacterSet("preprocessor condition operator", "<=>%/*^!");
     return ExtractToken(description, tk, operatorChars);
 }
 
+/// throws on non-empty remaining input
+static void
+RejectTrailingGarbage(const char * const parsedInputDescription, const SBuf &parsedInput, const Parser::Tokenizer &tk)
+{
+    if (!tk.atEnd()) {
+        throw TextException(ToSBuf("found trailing garbage after parsing ",
+                                   parsedInputDescription, ' ', parsedInput, ": ",
+                                   tk.remaining()), Here());
+    }
+}
+
 /// interprets the given raw string as a signed integer (in decimal, hex, or
 /// octal base per Parser::Tokenizer::int64())
 static int64_t
@@ -415,14 +426,13 @@ EvalNumber(const SBuf &raw)
     int64_t result = 0;
     if (!numberParser.int64(result, 0, true))
         throw TextException(ToSBuf("malformed integer near ", raw), Here());
-    if (!numberParser.remaining().isEmpty())
-        throw TextException(ToSBuf("found trailing garbage after parsing an integer near ", raw), Here());
+    RejectTrailingGarbage("integer", raw, numberParser);
     return result;
 }
 
-/// EvalBoolExpr() helper that interprets input prefix as a preprocessor condition
+/// IsIfStatementOpening() helper that interprets input prefix as a preprocessor condition
 static bool
-EvalBoolExpr_(Parser::Tokenizer &tk)
+EvalBoolExpr(Parser::Tokenizer &tk)
 {
     const auto operand = ExtractOperand("preprocessor condition", tk);
 
@@ -449,48 +459,41 @@ EvalBoolExpr_(Parser::Tokenizer &tk)
 /// \returns std::nullopt if input does not look like an `if` statement
 /// \returns `if` condition value if input is an `if` statement
 static std::optional<bool>
-FindIfStatementOpening(Parser::Tokenizer &tk)
+IsIfStatementOpening(Parser::Tokenizer tk)
 {
-    const auto savedTk = tk;
-
     // grammar: space* "if" space condition space* END
-    (void)SkipOptionalSpace_(tk);
+    (void)SkipOptionalSpace(tk);
     const auto keywordIf = SBuf("if");
-    if (tk.skip(keywordIf) && SkipOptionalSpace_(tk)) {
-        const auto result = EvalBoolExpr_(tk);
-        SkipOptionalSpace_(tk);
-        if (tk.atEnd())
-            return result;
-        throw TextException("found trailing garbage after parsing a preprocessor condition", Here());
+    if (tk.skip(keywordIf) && SkipOptionalSpace(tk)) {
+        const auto condition = tk.remaining();
+        const auto result = EvalBoolExpr(tk);
+        (void)SkipOptionalSpace(tk);
+        RejectTrailingGarbage("preprocessor condition", condition, tk);
+        return result;
     }
 
     // e.g., "iffy_error_responses on"
-    tk = savedTk;
     return std::nullopt;
 }
 
 /// interprets input as an `else` or `endif` line of a preprocessor `if` statement
 /// \returns false if input does not look like an `else` or `endif` line
 static bool
-FindIfStatementLine(const SBuf &keyword, Parser::Tokenizer &tk)
+IsIfStatementLine(const SBuf &keyword, Parser::Tokenizer tk)
 {
-    const auto savedTk = tk;
-
     // grammar: space* keyword space* END
-    (void)SkipOptionalSpace_(tk);
+    (void)SkipOptionalSpace(tk);
     if (tk.skip(keyword)) {
         if (tk.atEnd())
             return true;
 
-        if (SkipOptionalSpace_(tk)) {
-            if (tk.atEnd())
-                return true;
-            throw TextException(ToSBuf("found trailing garbage after preprocessor keyword: ", keyword), Here());
+        if (SkipOptionalSpace(tk)) {
+            RejectTrailingGarbage("preprocessor keyword", keyword, tk);
+            return true;
         }
         // e.g., "elseif"
     }
 
-    tk = savedTk;
     return false;
 }
 
@@ -498,20 +501,17 @@ FindIfStatementLine(const SBuf &keyword, Parser::Tokenizer &tk)
 /// \returns std::nullopt if input does not look like an `include` statement
 /// \returns `include` parameters if input is an `include` statement
 static std::optional<SBuf>
-FindIncludeLine(Parser::Tokenizer &tk)
+IsIncludeLine(Parser::Tokenizer tk)
 {
-    const auto savedTk = tk;
-
     // grammar: space* "include" space files space* END
-    (void)SkipOptionalSpace_(tk);
+    (void)SkipOptionalSpace(tk);
     const auto keywordInclude = SBuf("include");
-    if (tk.skip(keywordInclude) && SkipOptionalSpace_(tk)) {
+    if (tk.skip(keywordInclude) && SkipOptionalSpace(tk)) {
         // for simplicity sake, we leave trailing space, if any, in the result
         return tk.remaining();
     }
 
     // e.g., "include_version_info allow all"
-    tk = savedTk;
     return std::nullopt;
 }
 
@@ -624,21 +624,21 @@ parseOneConfigFile(const char *file_name, unsigned int depth)
 
         static const auto keywordElse = SBuf("else");
         static const auto keywordEndif = SBuf("endif");
-        if (const auto condition = FindIfStatementOpening(tk)) {
+        if (const auto condition = IsIfStatementOpening(tk)) {
             if_states.push_back(*condition); // store last if-statement meaning
-        } else if (FindIfStatementLine(keywordEndif, tk)) {
+        } else if (IsIfStatementLine(keywordEndif, tk)) {
             if (!if_states.empty())
                 if_states.pop_back(); // remove last if-statement meaning
             else
                 fatalf("'endif' without 'if'\n");
-        } else if (FindIfStatementLine(keywordElse, tk)) {
+        } else if (IsIfStatementLine(keywordElse, tk)) {
             if (!if_states.empty())
                 if_states.back() = !if_states.back();
             else
                 fatalf("'else' without 'if'\n");
         } else if (if_states.empty() || if_states.back()) { // test last if-statement meaning if present
             /* Handle includes here */
-            if (const auto files = FindIncludeLine(tk)) {
+            if (const auto files = IsIncludeLine(tk)) {
                 err_count += parseIncludedConfigFiles(*files, depth + 1);
             } else {
                 try {

From 02a4059e43c7c3da9f65db20f1aa80bf4e5f8432 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 14 Jun 2024 15:17:31 -0400
Subject: [PATCH 05/13] fixup: More temporary workarounds for
 Tokenizer::int64() bugs

---
 test-suite/squidconf/pp-ifs.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf
index 2afd7e30018..3ba2c174c40 100644
--- a/test-suite/squidconf/pp-ifs.conf
+++ b/test-suite/squidconf/pp-ifs.conf
@@ -8,7 +8,7 @@
 if true
     if 10 = 0xA
         if 00=000
-            if  -0= +0
+            if  -00= +00
             # if -1=+1
                 if false
                     unreachable_directive

From a9eaa360ebc6ef382d152758996e94f3b1c129d5 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 14 Jun 2024 15:29:46 -0400
Subject: [PATCH 06/13] Add a weak test of SMP configuration macro expansion

Better than nothing. We could add a stronger test that validates
equality, but it would make this configuration file inapplicable to SMP
Squids. IMO, we should not do that without also developing the
corresponding instruction (and adding that to the instructions file).
---
 test-suite/squidconf/pp-ifs.conf | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf
index 3ba2c174c40..1fc28d1cc0e 100644
--- a/test-suite/squidconf/pp-ifs.conf
+++ b/test-suite/squidconf/pp-ifs.conf
@@ -7,15 +7,18 @@
 
 if true
     if 10 = 0xA
-        if 00=000
-            if  -00= +00
-            # if -1=+1
-                if false
-                    unreachable_directive
-                else
-                    if preprocessor rejects this particular malformed line, then the test passes
+        if 0${process_number} = 077777777
+        else
+            if 00=000
+                if  -00= +00
+                # if -1=+1
+                    if false
+                        unreachable_directive
+                    else
+                        if preprocessor rejects this particular malformed line, then the test passes
+                    endif
+                # endif
                 endif
-            # endif
             endif
         endif
     else

From 4a565ed3612d204a641aba01f00506f28065554e Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Tue, 18 Jun 2024 09:22:59 -0400
Subject: [PATCH 07/13] fixup: Prefer std::numeric_limits to unnamed constants

Also fix #include order for STL headers.

Context:
https://github.com/squid-cache/squid/pull/1840#pullrequestreview-2123991179
---
 src/base/CharacterSet.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/base/CharacterSet.cc b/src/base/CharacterSet.cc
index e1bcb20035f..68fca0d5b30 100644
--- a/src/base/CharacterSet.cc
+++ b/src/base/CharacterSet.cc
@@ -10,8 +10,9 @@
 #include "base/CharacterSet.h"
 
 #include <algorithm>
-#include <iostream>
 #include <functional>
+#include <iostream>
+#include <limits>
 
 CharacterSet &
 CharacterSet::operator +=(const CharacterSet &src)
@@ -173,7 +174,8 @@ CharacterSet::libcSpace()
 {
     const auto creator = []() -> auto {
         const auto chars = new CharacterSet("libcSpace");
-        for (size_t idx = 0; idx < 256; ++idx) {
+        using charLimits = std::numeric_limits<CharacterSet::Storage::value_type>;
+        for (size_t idx = charLimits::min(); idx <= charLimits::max(); ++idx) {
             if (xisspace(idx))
                 chars->add(idx);
         }

From d0e01dd0679a6959155eb0f6045444052a9026a2 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 21 Jun 2024 13:30:34 -0400
Subject: [PATCH 08/13] fixup: Undo error text improvement

Context:
https://github.com/squid-cache/squid/pull/1840#discussion_r1649189709
---
 src/cache_cf.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cache_cf.cc b/src/cache_cf.cc
index 56c64ac15cc..427f758fdcd 100644
--- a/src/cache_cf.cc
+++ b/src/cache_cf.cc
@@ -315,7 +315,7 @@ parseIncludedConfigFiles(const SBuf &paths, const int depth)
     while (auto path = NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(tk)) {
         if (glob(path->c_str(), globbuf.gl_pathc ? GLOB_APPEND : 0, nullptr, &globbuf) != 0) {
             const auto xerrno = errno;
-            throw TextException(ToSBuf("Cannot find included configuration file named ", *path, ": ", xstrerr(xerrno)), Here());
+            throw TextException(ToSBuf("Unable to find configuration file: ", *path, ": ", xstrerr(xerrno)), Here());
         }
     }
     for (i = 0; i < (int)globbuf.gl_pathc; ++i) {

From 896e2f52b6c950ddd0e06c9f2d9c84f98d204837 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 21 Jun 2024 13:35:39 -0400
Subject: [PATCH 09/13] fixup: Detail a TODO

Context:
https://github.com/squid-cache/squid/pull/1840#discussion_r1649152617
---
 src/ConfigParser.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ConfigParser.cc b/src/ConfigParser.cc
index ccbea97bd6e..b4f08db2ae9 100644
--- a/src/ConfigParser.cc
+++ b/src/ConfigParser.cc
@@ -566,7 +566,7 @@ SBuf
 ConfigParser::openDirective(const SBuf &line)
 {
     Parser::Tokenizer tk(line);
-    SBuf directiveName; // TODO: Upgrade cfg_directive to SBuf and set it here.
+    SBuf directiveName; // TODO: Upgrade cfg_directive to a ConfigParser member (with SBuf type) and set it here.
     static const auto spaceChars = CharacterSet("w_space", w_space);
     static const auto directiveChars = spaceChars.complement("squid.conf directive name");
     const auto found = tk.prefix(directiveName, directiveChars);

From 2e4d700aff38f285a0766894366ea6d4339b4f7f Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 5 Jul 2024 11:59:08 -0400
Subject: [PATCH 10/13] Switch to "only SP/HT" grammar when parsing include
 filenames

This change should have been done in master/v7 commit 314e430, but I
missed it because the caller does not use isspace() directly.

This change removes the last use of branch-added
CharacterSet::libcSpace().
---
 src/parser/Tokenizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 41cf9971fd3..ef0824adce7 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -368,7 +368,7 @@ UnquoteSequence_(Parser::Tokenizer &tk)
 std::optional<SBuf>
 NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(Parser::Tokenizer &tk)
 {
-    const auto &spaceChars = CharacterSet::libcSpace();
+    const auto &spaceChars = CharacterSet::WSP;
 
     (void)tk.skipAll(spaceChars);
     if (tk.atEnd())

From 7e1af18fbc807cee3608fd1a8b7639d31e0292aa Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Fri, 5 Jul 2024 12:07:08 -0400
Subject: [PATCH 11/13] Removed no-longer-used branch-added
 CharacterSet::libcSpace()

---
 src/base/CharacterSet.cc | 19 +------------------
 src/base/CharacterSet.h  |  3 ---
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/src/base/CharacterSet.cc b/src/base/CharacterSet.cc
index 68fca0d5b30..3793b7c745c 100644
--- a/src/base/CharacterSet.cc
+++ b/src/base/CharacterSet.cc
@@ -10,9 +10,8 @@
 #include "base/CharacterSet.h"
 
 #include <algorithm>
-#include <functional>
 #include <iostream>
-#include <limits>
+#include <functional>
 
 CharacterSet &
 CharacterSet::operator +=(const CharacterSet &src)
@@ -169,19 +168,3 @@ CharacterSet::RFC3986_UNRESERVED()
     return *chars;
 }
 
-const CharacterSet &
-CharacterSet::libcSpace()
-{
-    const auto creator = []() -> auto {
-        const auto chars = new CharacterSet("libcSpace");
-        using charLimits = std::numeric_limits<CharacterSet::Storage::value_type>;
-        for (size_t idx = charLimits::min(); idx <= charLimits::max(); ++idx) {
-            if (xisspace(idx))
-                chars->add(idx);
-        }
-        return chars;
-    };
-    static const auto chars = creator();
-    return *chars;
-}
-
diff --git a/src/base/CharacterSet.h b/src/base/CharacterSet.h
index a6eea43c1f2..880072c0306 100644
--- a/src/base/CharacterSet.h
+++ b/src/base/CharacterSet.h
@@ -121,9 +121,6 @@ class CharacterSet
     /// allowed URI characters that do not have a reserved purpose, RFC 3986
     static const CharacterSet &RFC3986_UNRESERVED();
 
-    /// characters that libc isspace(3) matches, including any locale-specific ones
-    static const CharacterSet &libcSpace();
-
 private:
     /** index of characters in this set
      *

From 2176dd8a1b4b27910621ed7ec8db7256fb444f10 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Mon, 8 Jul 2024 16:14:28 -0400
Subject: [PATCH 12/13] fixup: Removed distracting workarounds for int64()
 zero-parsing bugs

---
 test-suite/squidconf/pp-ifs.conf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf
index 1fc28d1cc0e..aec53776af4 100644
--- a/test-suite/squidconf/pp-ifs.conf
+++ b/test-suite/squidconf/pp-ifs.conf
@@ -7,10 +7,10 @@
 
 if true
     if 10 = 0xA
-        if 0${process_number} = 077777777
+        if ${process_number} = 9223372036854775807
         else
-            if 00=000
-                if  -00= +00
+            if 0=000
+                if  -0= +0
                 # if -1=+1
                     if false
                         unreachable_directive

From 2a6a41f4cc9bf024656846d25960dbb82b24833a Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Mon, 8 Jul 2024 16:15:31 -0400
Subject: [PATCH 13/13] fixup: Fixed (or at least improved) test case wording

---
 test-suite/squidconf/pp-ifs.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf
index aec53776af4..2d3ddaf8be0 100644
--- a/test-suite/squidconf/pp-ifs.conf
+++ b/test-suite/squidconf/pp-ifs.conf
@@ -22,6 +22,6 @@ if true
             endif
         endif
     else
-        if this unreachable statement reached then preprocessor failed
+        if this unreachable statement was reached then preprocessor failed
     endif
 endif