From 8ee43be26d24b9644f3da776d063262cce403c10 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 12 Apr 2024 11:17:14 -0400 Subject: [PATCH] Fix up embdoc lexing on EOF --- src/prism.c | 28 ++++++++++++++++++++-------- templates/src/diagnostic.c.erb | 2 +- test/prism/errors_test.rb | 13 +++++++++---- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/prism.c b/src/prism.c index c370a8b2bf6..9fabc5ec0c5 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9605,15 +9605,23 @@ lex_embdoc(pm_parser_t *parser) { pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC); if (comment == NULL) return PM_TOKEN_EOF; - // Now, loop until we find the end of the embedded documentation or the end of - // the file. + // Now, loop until we find the end of the embedded documentation or the end + // of the file. while (parser->current.end + 4 <= parser->end) { parser->current.start = parser->current.end; - // If we've hit the end of the embedded documentation then we'll return that - // token here. - if (memcmp(parser->current.end, "=end", 4) == 0 && - (parser->current.end + 4 == parser->end || pm_char_is_whitespace(parser->current.end[4]))) { + // If we've hit the end of the embedded documentation then we'll return + // that token here. + if ( + (memcmp(parser->current.end, "=end", 4) == 0) && + ( + (parser->current.end + 4 == parser->end) || // end of file + pm_char_is_whitespace(parser->current.end[4]) || // whitespace + (parser->current.end[4] == '\0') || // NUL or end of script + (parser->current.end[4] == '\004') || // ^D + (parser->current.end[4] == '\032') // ^Z + ) + ) { const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end); if (newline == NULL) { @@ -10425,9 +10433,13 @@ parser_lex(pm_parser_t *parser) { // = => =~ == === =begin case '=': - if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) { + if ( + current_token_starts_line(parser) && + (parser->current.end + 5 <= parser->end) && + memcmp(parser->current.end, "begin", 5) == 0 && + (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0')) + ) { pm_token_type_t type = lex_embdoc(parser); - if (type == PM_TOKEN_EOF) { LEX(type); } diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 209afd2ee5a..924d9e6b3fb 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -152,7 +152,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_DEF_RECEIVER_TERM] = { "expected a `.` or `::` after the receiver in a method definition", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_DEF_TERM] = { "expected an `end` to close the `def` statement", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_DEFINED_EXPRESSION] = { "expected an expression after `defined?`", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_EMBDOC_TERM] = { "could not find a terminator for the embedded document", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EMBDOC_TERM] = { "embedded document meets end of file", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EMBEXPR_END] = { "expected a `}` to close the embedded expression", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EMBVAR_INVALID] = { "invalid embedded variable", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_END_UPCASE_BRACE] = { "expected a `{` after `END`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index cabc96c8eab..0a06e4bd38f 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -105,9 +105,14 @@ def test_pre_execution_context end def test_unterminated_embdoc - assert_errors expression("1"), "1\n=begin\n", [ - ["could not find a terminator for the embedded document", 2..9] - ] + message = "embedded document meets end of file" + assert_error_messages "=begin", [message] + assert_error_messages "=begin\n", [message] + + refute_error_messages "=begin\n=end" + refute_error_messages "=begin\n=end\0" + refute_error_messages "=begin\n=end\C-d" + refute_error_messages "=begin\n=end\C-z" end def test_unterminated_i_list @@ -2217,7 +2222,7 @@ def assert_error_messages(source, errors) def refute_error_messages(source) assert_valid_syntax(source) - assert Prism.parse_success?(source) + assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully" end def assert_warning_messages(source, warnings)