Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LibWeb: Fix two character reference tokenization bugs #3163

Merged
merged 3 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,10 @@ namespace Web::HTML {
} \
} while (0)

#define DONT_CONSUME_NEXT_INPUT_CHARACTER \
do { \
restore_to(m_prev_utf8_iterator); \
#define DONT_CONSUME_NEXT_INPUT_CHARACTER \
do { \
if (current_input_character.has_value()) \
restore_to(m_prev_utf8_iterator); \
} while (0)

#define ON(code_point) \
Expand Down Expand Up @@ -1821,16 +1822,11 @@ Optional<HTMLToken> HTMLTokenizer::next_token(StopAtInsertionPoint stop_at_inser
m_character_reference_code += current_input_character.value() - 0x30;
continue;
}
ON_ASCII_UPPER_ALPHA
{
m_character_reference_code *= 16;
m_character_reference_code += current_input_character.value() - 0x37;
continue;
}
ON_ASCII_LOWER_ALPHA
ON_ASCII_HEX_DIGIT
{
m_character_reference_code *= 16;
m_character_reference_code += current_input_character.value() - 0x57;
auto hex_digit_min_ascii_value = is_ascii_upper_alpha(current_input_character.value()) ? 0x37 : 0x57;
m_character_reference_code += current_input_character.value() - hex_digit_min_ascii_value;
continue;
}
ON(';')
Expand Down
2 changes: 0 additions & 2 deletions Tests/LibWeb/TestConfig.ini
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ Text/input/Worker/Worker-performance.html
Text/input/Worker/Worker-postMessage-transfer.html

; Skipped due to assertion failures
Text/input/wpt-import/html/syntax/parsing/html5lib_entities01.html
Text/input/wpt-import/html/syntax/parsing/html5lib_plain-text-unsafe.html
Text/input/wpt-import/html/syntax/parsing/html5lib_template.html
Text/input/wpt-import/html/syntax/parsing/html5lib_tests1.html
Expand All @@ -39,7 +38,6 @@ Text/input/wpt-import/html/syntax/parsing/html5lib_tests16.html
Text/input/wpt-import/html/syntax/parsing/html5lib_tests19.html
Text/input/wpt-import/html/syntax/parsing/html5lib_tests5.html
Text/input/wpt-import/html/syntax/parsing/html5lib_webkit01.html
Text/input/wpt-import/html/syntax/parsing/named-character-references.html

; Support files (not tests themselves)
Text/input/wpt-import/html/syntax/parsing/support/no-doctype-name-eof.html
Expand Down
19 changes: 19 additions & 0 deletions Tests/LibWeb/TestHTMLTokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,25 @@ TEST_CASE(character_reference_in_attribute)
END_ENUMERATION();
}

TEST_CASE(numeric_character_reference)
{
auto tokens = run_tokenizer("&#1111"sv);
BEGIN_ENUMERATION(tokens);
EXPECT_CHARACTER_TOKEN(1111);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}

TEST_CASE(hex_character_reference)
{
auto tokens = run_tokenizer("&#xA12bZ"sv);
BEGIN_ENUMERATION(tokens);
EXPECT_CHARACTER_TOKEN(0xA12B);
EXPECT_CHARACTER_TOKEN('Z');
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}

TEST_CASE(comment)
{
auto tokens = run_tokenizer("<p><!-- This is a comment --></p>"sv);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
Harness status: OK

Found 75 tests

75 Pass
Pass html5lib_entities01.html 16c694bcf0b3ff3723fa070eea7e1e82ef12a337
Pass html5lib_entities01.html 05e04b39ef06e2367a33326f5dd566913aa6628f
Pass html5lib_entities01.html fbf7d9fec595585869c5c595d5588b34fd175278
Pass html5lib_entities01.html e59b0a76d7bcfb429b27e00e469f35e08a9bdd1a
Pass html5lib_entities01.html 5ea854d6ecd4d6dd459cb36d4faf3ed36e11c073
Pass html5lib_entities01.html 119cd15b852615cd0fce759769b4a3788595e3bb
Pass html5lib_entities01.html 903cefcfae1125cb71fc77f4a6b7d3546e8f4020
Pass html5lib_entities01.html 69f08b40c7506153e809415ca98e2ed98992216b
Pass html5lib_entities01.html 9c00a1833e8cf4af28c8bd94902412ad7052b4b0
Pass html5lib_entities01.html b5bcdcbc6e88b380be0e48ca2620fbbb8e92e497
Pass html5lib_entities01.html bf6c90305b2856c2d9c9a146dfff867fe7a5e0f3
Pass html5lib_entities01.html 6b9c8d175a3d7b6cf04ffd72e44a7dc88686460f
Pass html5lib_entities01.html 76c184d9ce64b8a52c2e67eafeb8d332c096f2be
Pass html5lib_entities01.html 4c30f8f931eb44c2f208e837555c0cc444dd4612
Pass html5lib_entities01.html 1db77ef761092d65ce847c0bcd6e7cb892db754d
Pass html5lib_entities01.html 284c18aa40a73e4052525a9ffb30b23182f237ea
Pass html5lib_entities01.html 6b336a43e394d3ab7ceb2ab54c63409e8a27aded
Pass html5lib_entities01.html 436c30dd76bf9b5c2b29a181d9a9412ec0ab4bdf
Pass html5lib_entities01.html 1373a52ddcb71f20f29d92abb6714eaabeba7424
Pass html5lib_entities01.html d60f4f324a1ad9c09c4d3590c8c537af2852eeb4
Pass html5lib_entities01.html d21511e2df56c306c78e1449c960c66e565e016e
Pass html5lib_entities01.html 39107d16f24d4c7bcd40ad1239b5f4f677877ee8
Pass html5lib_entities01.html a44b740e8b2349e75c9eb0376f665eab13ff821d
Pass html5lib_entities01.html a0e38b1c19eba037b34c68864634cff032f0b892
Pass html5lib_entities01.html 390d9571a24be0961c8fcd78c69eea16a6414246
Pass html5lib_entities01.html ceba8404405dd3b3b423c45411bde15bf72a846d
Pass html5lib_entities01.html f8dd2fccc21d3a08790a7877186840a692adf111
Pass html5lib_entities01.html d526830d439d3c4e966b22fbedf819d465d3107b
Pass html5lib_entities01.html 240af7bacbfecce6e2a973de9c89fad817fb8d42
Pass html5lib_entities01.html d657585ca1df5b86693fca8a0a2eae76bd9b1c2b
Pass html5lib_entities01.html b508fcffb2d9f2424c7837270e51824321fb4570
Pass html5lib_entities01.html 897ab551df27df14418a46ff1c3acef8338c53f3
Pass html5lib_entities01.html 493a472ccf903088c813ff6874d54482a161df8f
Pass html5lib_entities01.html 67d19edb1f6ecbbb6ccb90df2345e52e5c58efc3
Pass html5lib_entities01.html dded422b7406c966c944555f220d7d3dfcf2a143
Pass html5lib_entities01.html 60a76c9e10e4ac53f836f9e45eb0518dd0b7b73b
Pass html5lib_entities01.html b47fcc6a614247319908b00935f10ec134399917
Pass html5lib_entities01.html 3b5c7d0331ae900e1179eaf3545c78d147434fbf
Pass html5lib_entities01.html a0f119508046dbb4f8059232f6e99f66c1e8e7a6
Pass html5lib_entities01.html daab384be8471edeb755353c5dccaad0c415dac5
Pass html5lib_entities01.html 8485e4d103a517615f39c0d0b71fe5065c5437db
Pass html5lib_entities01.html 4c28749faddb096d1f04792b7daf039268c43181
Pass html5lib_entities01.html c84c576954c4c493528eaa34233c926653152be5
Pass html5lib_entities01.html b2797e18c499df32296545225c259dbf4bea2908
Pass html5lib_entities01.html 5b2dfe6f187413faecd91336bd353c05768ea722
Pass html5lib_entities01.html 34af7be5bcff18ec869a306b19daea70f61f7088
Pass html5lib_entities01.html 8f2e74688427858fc1a895fc472d074a0528a7ae
Pass html5lib_entities01.html 530d6251a43d688e69959237e519812585de8266
Pass html5lib_entities01.html bced9b8c339d0d2838ea0fffe9a64027b4f3a877
Pass html5lib_entities01.html 9c9e8079df25999c606bc84f46b348544a23b9b4
Pass html5lib_entities01.html ec61d22b3bc6f93e54bbff964311bba9a3a06b21
Pass html5lib_entities01.html 391f0136aebdd0e874c8eb85651ffda7e9f86f24
Pass html5lib_entities01.html e2974e7029b008539aacc1ee885705764f8c53f4
Pass html5lib_entities01.html 23e105ca5329cc0338a96fe7e088ba9b319c46d1
Pass html5lib_entities01.html 87af28752724c400edef3970e52440639be5b1a8
Pass html5lib_entities01.html f50d9e39e2bda3d8c0b1c69d1f1e4e86c4b39ac3
Pass html5lib_entities01.html 12827fadb8d36b829e9c2c7315e0848d2d7ef278
Pass html5lib_entities01.html f603997321070e6ccada6fefe4240a9e6c7e870a
Pass html5lib_entities01.html a88e381bf36e74aaa8ac5b0877153b7158bae579
Pass html5lib_entities01.html 11019fa64a25748a5bfb864fc200ec2710b54aa3
Pass html5lib_entities01.html 3d238b9146102bd11e898ff4913f86e8ded65be4
Pass html5lib_entities01.html a1375bfde7be56e514471700e030b1c7e7090e2f
Pass html5lib_entities01.html 7936b73efa385d183e93453bea24fd0c4dff4742
Pass html5lib_entities01.html 41cdf6978b0c48e7044d5e4534fc8bb08de4cdf2
Pass html5lib_entities01.html 5138f572a4db2e2edc2d723e1bb87af72ab501f4
Pass html5lib_entities01.html f30757617b6df330deba1cb607e8d47f71bda13f
Pass html5lib_entities01.html a6fd8cdca1fa8cf07519d9a0c5b779eafa438b70
Pass html5lib_entities01.html ada342466887e85d89c3b815b127bfced036ac76
Pass html5lib_entities01.html 74bd99a9263f0b8e8a5fac4d2684fe37d5a1a9cc
Pass html5lib_entities01.html bffe7b00046407080251ab6bf58cb97ce2a34893
Pass html5lib_entities01.html 5aef37f1f2b9ac45adfade044c882eb09a297569
Pass html5lib_entities01.html 6e2d817539fb3b2023c7bcb88ad220c136f70cf0
Pass html5lib_entities01.html d4ac52727ff405f61a1d878a0aa1951ae5264c80
Pass html5lib_entities01.html d2584faaa4dda5283955b2dc22812a018d04a72d
Pass html5lib_entities01.html 56dc3e612fbfa06cfeb26957e357defcf73aa220
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ Harness status: OK

Found 63 tests

62 Pass
1 Fail
63 Pass
Pass html5lib_tests2.html e070301fb578bd639ecbc7ec720fa60222d05826
Pass html5lib_tests2.html aaf24dabcb42470e447d241a40def0d136c12b93
Pass html5lib_tests2.html b6c1142484570bb90c36e454ee193cca17bb618a
Expand All @@ -27,7 +26,7 @@ Pass html5lib_tests2.html 73b97cd984a62703ec54ec4a876ec32aa5fd3b8c
Pass html5lib_tests2.html 2db9616ed62fc2a26056f3395459869cf556974d
Pass html5lib_tests2.html b59aa1c714892618eaccd51696658887fcbd2045
Pass html5lib_tests2.html 98818e7fda2506603bd208662613edb40297c2d3
Fail html5lib_tests2.html e0c43080cf61c0696031bdb097bea4f2a647cfc2
Pass html5lib_tests2.html e0c43080cf61c0696031bdb097bea4f2a647cfc2
Pass html5lib_tests2.html f7753d80a422c40b5fa04d99e52d8ae83369757a
Pass html5lib_tests2.html 7cbd584aef9508a90c98f80040078149a92ec869
Pass html5lib_tests2.html e0f7f130b1e3653dd06f10f3492e4f0bf4cd3cfa
Expand Down
Loading
Loading