From a9bb9c5181d6b20cfe168839eda0c39600ad2bb2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 21 Dec 2023 14:07:32 -0800 Subject: [PATCH] TextFormat should always escape ASCII 127 (DEL). This was missed in the previous CL. DEL characters would be printed unescaped, which makes them more difficult to read but should still be data-preserving. PiperOrigin-RevId: 592946127 --- src/google/protobuf/text_format.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 591543c90323..cc74abc77898 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -1654,13 +1654,22 @@ namespace { // Returns true if `ch` needs to be escaped in TextFormat, independent of any // UTF-8 validity issues. bool DefinitelyNeedsEscape(unsigned char ch) { - if (ch < 32) return true; + if (ch >= 0x80) { + return false; // High byte; no escapes necessary if UTF-8 is vaid. + } + + if (!absl::ascii_isprint(ch)) { + return true; // Unprintable characters need escape. + } + switch (ch) { case '\"': case '\'': case '\\': + // These characters need escapes despite being printable. return true; } + return false; }