From 461df9d621b6653bec63fcc6f28a9729f89c54e7 Mon Sep 17 00:00:00 2001 From: Simon Resch Date: Thu, 3 Mar 2022 09:04:41 +0100 Subject: [PATCH 1/2] Revert "Escape and in string literals so that + // the output can be embedded in HTML script elements and in XML CDATA + // sections. + case '/': + // Don't over escape. Many JSON bodies contain innocuous HTML + // that can be safely embedded. + if (i > start && i + 2 < end && '<' == jsonish.charAt(i - 1) + && 's' == (jsonish.charAt(i + 1) | 32) + && 'c' == (jsonish.charAt(i + 2) | 32)) { + insert(i, '\\'); } break; case ']': diff --git a/src/test/java/com/google/json/JsonSanitizerTest.java b/src/test/java/com/google/json/JsonSanitizerTest.java index 6a397b6..50bbf62 100644 --- a/src/test/java/com/google/json/JsonSanitizerTest.java +++ b/src/test/java/com/google/json/JsonSanitizerTest.java @@ -58,12 +58,14 @@ public static final void testSanitize() { assertSanitized("\"foo\""); assertSanitized("\"foo\"", "'foo'"); assertSanitized( - "\"\\u003cscript>foo()\\u003c/script>\"", "\"\""); - assertSanitized("\"\\u003c/SCRIPT\\n>\"", "\"\""); - assertSanitized("\"\\u003c/ScRIpT\"", "\"foo()<\\/script>\"", "\"\""); + assertSanitized( + "\"\""); + assertSanitized("\"<\\/SCRIPT\\n>\"", "\"\""); + assertSanitized("\"<\\/ScRIpT\"", "\"Hello\""); assertSanitized("\"Hello\""); assertSanitized("\"<[[\\u005d]>\"", "'<[[]]>'"); @@ -211,151 +213,4 @@ public static final void testIssue13() { "[ { \"description\": \"aa##############aa\" }, 1 ]", "[ { \"description\": \"aa##############aa\" }, 1 ]"); } - - @Test - public static final void testHtmlParserStateChanges() { - assertSanitized("\"\\u003cscript\"", "\"\"", "\"\""); - assertSanitized("\"script\"", "\""); - assertSanitized("-0", "-->"); - - assertSanitized("\"\\u003c!--\\u003cscript>\"", "\"")); - assertFalse(out, out.contains("")); - assertFalse(out, out.contains("u\\u\\uu\ufffd\ufffd\\u7u\\u\\u\\u\ufffdu<\\script>5+alert(1)//"; - assertEquals( - "\"x\\u003c/script>uuuu\uFFFD\uFFFDu7uuuu\uFFFDu\\u003cscript>5+alert(1)//\"", - JsonSanitizer.sanitize(input) - ); - } - - @Test - public static final void testInvalidOutput() { - // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com) - String input = "\u0010{'\u0000\u0000'\"\u0000\"{.\ufffd-0X29295909049550970,\n\n0"; - String want = "{\"\\u0000\\u0000\":\"\\u0000\",\"\":{\"0\":-47455995597866469744,\n\n\"0\":null}}"; - String got = JsonSanitizer.sanitize(input); - assertEquals(want, got); - } - - @Test - public static final void testBadNumber() { - String input = "¶0x.\\蹃4\\À906"; - String want = "0.0"; - String got = JsonSanitizer.sanitize(input); - assertEquals(want, got); - } - - @Test - public static final void testDashDashGtEscaped() { - String input = "'->??-\\->"; - String want = "\"->??--\\u003e\""; - String got1 = JsonSanitizer.sanitize(input); - assertEquals(want, got1); - String got2 = JsonSanitizer.sanitize(got1); - assertEquals(want, got2); - } - - @Test - public static final void testDashDashGtUEscaped() { - String input = "'.\\u002D->'"; - String want = "\".\\u002D-\\u003e\""; - String got1 = JsonSanitizer.sanitize(input); - assertEquals(want, got1); - String got2 = JsonSanitizer.sanitize(got1); - assertEquals(want, got2); - } - - @Test - public static final void testEscHtmlCommentClose() { - String input = "x--\\>"; - String want = "\"x--\\u003e\""; - String got1 = JsonSanitizer.sanitize(input); - assertEquals(want, got1); - String got2 = JsonSanitizer.sanitize(got1); - assertEquals(want, got2); - } } From b25cffe0f7963bdf45f6d17a756e8acb28b92681 Mon Sep 17 00:00:00 2001 From: Simon Resch Date: Thu, 3 Mar 2022 17:40:00 +0100 Subject: [PATCH 2/2] Escape and in string literals so that - // the output can be embedded in HTML script elements and in XML CDATA - // sections. - case '/': - // Don't over escape. Many JSON bodies contain innocuous HTML - // that can be safely embedded. - if (i > start && i + 2 < end && '<' == jsonish.charAt(i - 1) - && 's' == (jsonish.charAt(i + 1) | 32) - && 'c' == (jsonish.charAt(i + 2) | 32)) { - insert(i, '\\'); + // Embedding. Disallow and ]]> in string + // literals so that the output can be embedded in HTML script elements + // and in XML CDATA sections without affecting the parser state. + // References: + // https://www.w3.org/TR/html53/semantics-scripting.html#restrictions-for-contents-of-script-elements + // https://www.w3.org/TR/html53/syntax.html#script-data-escaped-state + // https://www.w3.org/TR/html53/syntax.html#script-data-double-escaped-state + // https://www.w3.org/TR/xml/#sec-cdata-sect + case '<': { + // Disallow , which lets the HTML parser switch out of the "script + // data escaped" or "script data double escaped" state. + if ((i - 2) >= start) { + int lb = i - 1; + if ((runSlashPreceding(jsonish, lb) & 1) == 1) { + // If the '>' is escaped backup over its slash. + lb -= 1; + } + int cm1AndDelta = unescapedCharRev(jsonish, lb); + char cm1 = (char) cm1AndDelta; + if ('-' == cm1) { + lb -= cm1AndDelta >>> 16; + int cm2AndDelta = unescapedCharRev(jsonish, lb); + char cm2 = (char) cm2AndDelta; + if ('-' == cm2) { + replace(i, i + 1, "\\u003e"); // Escaped > + } + } } break; case ']': diff --git a/src/test/java/com/google/json/JsonSanitizerTest.java b/src/test/java/com/google/json/JsonSanitizerTest.java index 50bbf62..6a397b6 100644 --- a/src/test/java/com/google/json/JsonSanitizerTest.java +++ b/src/test/java/com/google/json/JsonSanitizerTest.java @@ -58,14 +58,12 @@ public static final void testSanitize() { assertSanitized("\"foo\""); assertSanitized("\"foo\"", "'foo'"); assertSanitized( - "\"\""); - assertSanitized( - "\"\""); - assertSanitized("\"<\\/SCRIPT\\n>\"", "\"\""); - assertSanitized("\"<\\/ScRIpT\"", "\"foo()\\u003c/script>\"", "\"\""); + assertSanitized("\"\\u003c/SCRIPT\\n>\"", "\"\""); + assertSanitized("\"\\u003c/ScRIpT\"", "\"Hello\""); assertSanitized("\"Hello\""); assertSanitized("\"<[[\\u005d]>\"", "'<[[]]>'"); @@ -213,4 +211,151 @@ public static final void testIssue13() { "[ { \"description\": \"aa##############aa\" }, 1 ]", "[ { \"description\": \"aa##############aa\" }, 1 ]"); } + + @Test + public static final void testHtmlParserStateChanges() { + assertSanitized("\"\\u003cscript\"", "\"\"", "\"\""); + assertSanitized("\"script\"", "\""); + assertSanitized("-0", "-->"); + + assertSanitized("\"\\u003c!--\\u003cscript>\"", "\"")); + assertFalse(out, out.contains("")); + assertFalse(out, out.contains("u\\u\\uu\ufffd\ufffd\\u7u\\u\\u\\u\ufffdu<\\script>5+alert(1)//"; + assertEquals( + "\"x\\u003c/script>uuuu\uFFFD\uFFFDu7uuuu\uFFFDu\\u003cscript>5+alert(1)//\"", + JsonSanitizer.sanitize(input) + ); + } + + @Test + public static final void testInvalidOutput() { + // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com) + String input = "\u0010{'\u0000\u0000'\"\u0000\"{.\ufffd-0X29295909049550970,\n\n0"; + String want = "{\"\\u0000\\u0000\":\"\\u0000\",\"\":{\"0\":-47455995597866469744,\n\n\"0\":null}}"; + String got = JsonSanitizer.sanitize(input); + assertEquals(want, got); + } + + @Test + public static final void testBadNumber() { + String input = "¶0x.\\蹃4\\À906"; + String want = "0.0"; + String got = JsonSanitizer.sanitize(input); + assertEquals(want, got); + } + + @Test + public static final void testDashDashGtEscaped() { + String input = "'->??-\\->"; + String want = "\"->??--\\u003e\""; + String got1 = JsonSanitizer.sanitize(input); + assertEquals(want, got1); + String got2 = JsonSanitizer.sanitize(got1); + assertEquals(want, got2); + } + + @Test + public static final void testDashDashGtUEscaped() { + String input = "'.\\u002D->'"; + String want = "\".\\u002D-\\u003e\""; + String got1 = JsonSanitizer.sanitize(input); + assertEquals(want, got1); + String got2 = JsonSanitizer.sanitize(got1); + assertEquals(want, got2); + } + + @Test + public static final void testEscHtmlCommentClose() { + String input = "x--\\>"; + String want = "\"x--\\u003e\""; + String got1 = JsonSanitizer.sanitize(input); + assertEquals(want, got1); + String got2 = JsonSanitizer.sanitize(got1); + assertEquals(want, got2); + } }