From 2b7700c8e976cd9d06fad4940829c185c372345b Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Tue, 9 Jul 2024 13:48:55 +0100 Subject: [PATCH 1/2] don't rely on trailing whitespace in multiline strings We are going to forbid this shortly --- lib/compiler/aro/aro/Compilation.zig | 4 ++-- lib/compiler/aro/aro/Driver.zig | 4 ++-- lib/compiler/aro/aro/Tokenizer.zig | 10 +++++----- lib/compiler/resinator/comments.zig | 5 ++--- lib/std/compress/xz/test.zig | 24 +++++++++++------------- lib/std/zig/parser_test.zig | 24 ++++++++++++------------ src/link/tapi/Tokenizer.zig | 3 +-- test/compare_output.zig | 2 +- test/compile_errors.zig | 6 +++--- test/link/macho.zig | 14 +++++++------- tools/doctest.zig | 4 ++-- 11 files changed, 48 insertions(+), 52 deletions(-) diff --git a/lib/compiler/aro/aro/Compilation.zig b/lib/compiler/aro/aro/Compilation.zig index d6b9b9178012..769ffdc6d9d8 100644 --- a/lib/compiler/aro/aro/Compilation.zig +++ b/lib/compiler/aro/aro/Compilation.zig @@ -534,8 +534,8 @@ pub fn generateBuiltinMacros(comp: *Compilation, system_defines_mode: SystemDefi if (system_defines_mode == .include_system_defines) { try buf.appendSlice( - \\#define __VERSION__ "Aro - ++ @import("../backend.zig").version_str ++ "\"\n" ++ + \\#define __VERSION__ "Aro + ++ " " ++ @import("../backend.zig").version_str ++ "\"\n" ++ \\#define __Aro__ \\ ); diff --git a/lib/compiler/aro/aro/Driver.zig b/lib/compiler/aro/aro/Driver.zig index 6876395b8ac5..828d379ff3ad 100644 --- a/lib/compiler/aro/aro/Driver.zig +++ b/lib/compiler/aro/aro/Driver.zig @@ -109,9 +109,9 @@ pub const usage = \\ -fhosted Compilation in a hosted environment \\ -fms-extensions Enable support for Microsoft extensions \\ -fno-ms-extensions Disable support for Microsoft extensions - \\ -fdollars-in-identifiers + \\ -fdollars-in-identifiers \\ Allow '$' in identifiers - \\ -fno-dollars-in-identifiers + \\ -fno-dollars-in-identifiers \\ Disallow '$' in identifiers \\ -fmacro-backtrace-limit= \\ Set limit on how many macro expansion traces are shown in errors (default 6) diff --git a/lib/compiler/aro/aro/Tokenizer.zig b/lib/compiler/aro/aro/Tokenizer.zig index c5a84b8cc0b5..f4f38bf8398f 100644 --- a/lib/compiler/aro/aro/Tokenizer.zig +++ b/lib/compiler/aro/aro/Tokenizer.zig @@ -1871,11 +1871,11 @@ test "operators" { test "keywords" { try expectTokens( \\auto __auto_type break case char const continue default do - \\double else enum extern float for goto if int - \\long register return short signed sizeof static - \\struct switch typedef union unsigned void volatile - \\while _Bool _Complex _Imaginary inline restrict _Alignas - \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local + \\double else enum extern float for goto if int + \\long register return short signed sizeof static + \\struct switch typedef union unsigned void volatile + \\while _Bool _Complex _Imaginary inline restrict _Alignas + \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local \\__attribute __attribute__ \\ , &.{ diff --git a/lib/compiler/resinator/comments.zig b/lib/compiler/resinator/comments.zig index 67504bbbeb28..ed719bb5b555 100644 --- a/lib/compiler/resinator/comments.zig +++ b/lib/compiler/resinator/comments.zig @@ -322,11 +322,10 @@ test "multiline comment with newlines" { test "comments appended to a line" { try testRemoveComments( - \\blah - \\blah - , + "blah \nblah", \\blah // line comment \\blah + , ); try testRemoveComments( "blah \r\nblah", diff --git a/lib/std/compress/xz/test.zig b/lib/std/compress/xz/test.zig index 08180e45c0cf..27955b942090 100644 --- a/lib/std/compress/xz/test.zig +++ b/lib/std/compress/xz/test.zig @@ -44,19 +44,17 @@ test "compressed data" { "good-1-lzma2-3.xz", "good-1-lzma2-4.xz", }) |filename| { - try testReader(@embedFile("testdata/" ++ filename), - \\Lorem ipsum dolor sit amet, consectetur adipisicing - \\elit, sed do eiusmod tempor incididunt ut - \\labore et dolore magna aliqua. Ut enim - \\ad minim veniam, quis nostrud exercitation ullamco - \\laboris nisi ut aliquip ex ea commodo - \\consequat. Duis aute irure dolor in reprehenderit - \\in voluptate velit esse cillum dolore eu - \\fugiat nulla pariatur. Excepteur sint occaecat cupidatat - \\non proident, sunt in culpa qui officia - \\deserunt mollit anim id est laborum. - \\ - ); + try testReader(@embedFile("testdata/" ++ filename), "" ++ + "Lorem ipsum dolor sit amet, consectetur adipisicing \n" ++ + "elit, sed do eiusmod tempor incididunt ut \n" ++ + "labore et dolore magna aliqua. Ut enim \n" ++ + "ad minim veniam, quis nostrud exercitation ullamco \n" ++ + "laboris nisi ut aliquip ex ea commodo \n" ++ + "consequat. Duis aute irure dolor in reprehenderit \n" ++ + "in voluptate velit esse cillum dolore eu \n" ++ + "fugiat nulla pariatur. Excepteur sint occaecat cupidatat \n" ++ + "non proident, sunt in culpa qui officia \n" ++ + "deserunt mollit anim id est laborum. \n"); } try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), ""); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 38d279461ec5..b1c12b8c3a5e 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -5002,8 +5002,8 @@ test "zig fmt: space after top level doc comment" { } test "zig fmt: remove trailing whitespace after container doc comment" { - try testTransform( - \\//! top level doc comment + try testTransform("" ++ + "//! top level doc comment \n" ++ \\ , \\//! top level doc comment @@ -5012,8 +5012,8 @@ test "zig fmt: remove trailing whitespace after container doc comment" { } test "zig fmt: remove trailing whitespace after doc comment" { - try testTransform( - \\/// doc comment + try testTransform("" ++ + "/// doc comment \n" ++ \\a = 0, \\ , @@ -6218,8 +6218,8 @@ test "recovery: eof in c pointer" { test "matching whitespace on minus op" { try testError( - \\ _ = 2 -1, - \\ _ = 2- 1, + \\ _ = 2 -1, + \\ _ = 2- 1, \\ _ = 2- \\ 2, \\ _ = 2 @@ -6236,7 +6236,7 @@ test "matching whitespace on minus op" { \\ _ = -1, \\ _ = 2 - -1, \\ _ = 2 - 1, - \\ _ = 2-1, + \\ _ = 2-1, \\ _ = 2 - \\1, \\ _ = 2 @@ -6247,8 +6247,8 @@ test "matching whitespace on minus op" { test "ampersand" { try testError( \\ _ = bar && foo, - \\ _ = bar&&foo, - \\ _ = bar& & foo, + \\ _ = bar&&foo, + \\ _ = bar& & foo, \\ _ = bar& &foo, , &.{ .invalid_ampersand_ampersand, @@ -6258,9 +6258,9 @@ test "ampersand" { }); try testError( - \\ _ = bar & &foo, - \\ _ = bar & &&foo, - \\ _ = &&foo, + \\ _ = bar & &foo, + \\ _ = bar & &&foo, + \\ _ = &&foo, , &.{}); } diff --git a/src/link/tapi/Tokenizer.zig b/src/link/tapi/Tokenizer.zig index eb1ffc0e81fc..e3cd80fe843c 100644 --- a/src/link/tapi/Tokenizer.zig +++ b/src/link/tapi/Tokenizer.zig @@ -405,7 +405,7 @@ test "mappings" { test "inline mapped sequence of values" { try testExpected( - \\key : [ val1, + \\key : [ val1, \\ val2 ] , &[_]Token.Id{ .literal, @@ -416,7 +416,6 @@ test "inline mapped sequence of values" { .space, .literal, .comma, - .space, .new_line, .space, .literal, diff --git a/test/compare_output.zig b/test/compare_output.zig index d07864360f2f..53b6a42edb55 100644 --- a/test/compare_output.zig +++ b/test/compare_output.zig @@ -442,7 +442,7 @@ pub fn addCases(cases: *tests.CompareOutputContext) void { \\ \\pub const std_options = .{ \\ .log_level = .debug, - \\ + \\ \\ .log_scope_levels = &.{ \\ .{ .scope = .a, .level = .warn }, \\ .{ .scope = .c, .level = .err }, diff --git a/test/compile_errors.zig b/test/compile_errors.zig index 0dc191260f26..97f137b9c043 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -28,13 +28,13 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void { \\ ); \\} , &[_][]const u8{ - \\:2:5: error: + \\:2:5: error: \\ hello! \\ I'm a multiline error message. \\ I hope to be very useful! - \\ + \\ \\ also I will leave this trailing newline here if you don't mind - \\ + \\ }); } diff --git a/test/link/macho.zig b/test/link/macho.zig index b77bd279cbe5..a00c5304117e 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -646,7 +646,7 @@ fn testHelloC(b: *Build, opts: Options) *Step { const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = \\#include - \\int main() { + \\int main() { \\ printf("Hello world!\n"); \\ return 0; \\} @@ -932,7 +932,7 @@ fn testMergeLiteralsX64(b: *Build, opts: Options) *Step { \\ lea L._q1(%rip), %rax \\ mov (%rax), %xmm0 \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\l._s1: \\ .asciz "hello" @@ -958,7 +958,7 @@ fn testMergeLiteralsX64(b: *Build, opts: Options) *Step { \\ lea L._q2(%rip), %rax \\ mov (%rax), %xmm0 \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\l._s2: \\ .asciz "hello" @@ -1048,7 +1048,7 @@ fn testMergeLiteralsArm64(b: *Build, opts: Options) *Step { \\ adrp x8, L._q1@PAGE \\ ldr d0, [x8, L._q1@PAGEOFF] \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\l._s1: \\ .asciz "hello" @@ -1074,7 +1074,7 @@ fn testMergeLiteralsArm64(b: *Build, opts: Options) *Step { \\ adrp x8, L._q2@PAGE \\ ldr d0, [x8, L._q2@PAGEOFF] \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\l._s2: \\ .asciz "hello" @@ -1168,7 +1168,7 @@ fn testMergeLiteralsArm642(b: *Build, opts: Options) *Step { \\ adrp x0, L._q1@PAGE \\ ldr x0, [x0, L._q1@PAGEOFF] \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\_s1: \\ .asciz "hello" @@ -1189,7 +1189,7 @@ fn testMergeLiteralsArm642(b: *Build, opts: Options) *Step { \\ adrp x0, L._q2@PAGE \\ ldr x0, [x0, L._q2@PAGEOFF] \\ ret - \\ + \\ \\.section __TEXT,__cstring,cstring_literals \\_s2: \\ .asciz "hello" diff --git a/tools/doctest.zig b/tools/doctest.zig index fad9be86db2c..924379fc1fc8 100644 --- a/tools/doctest.zig +++ b/tools/doctest.zig @@ -1468,10 +1468,10 @@ test "printShell" { try testing.expectEqualSlices(u8, expected, buffer.items); } { - // intentional space after "--build-option1 \" const shell_out = \\$ zig build test.zig \ - \\ --build-option1 \ + \\ --build-option1 \ + ++ " \n" ++ \\ --build-option2 \\$ ./test ; From 4476ff4bae8d4c51b44eed0db9028ca135c366e1 Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Wed, 10 Jul 2024 16:13:59 +0100 Subject: [PATCH 2/2] astgen: forbid trailing whitespace in multiline strings Trailing whitespace in multiline strings is annoying for all the usual reasons trailing whitespace is annoying (unrelated line changes). However, in this case it even can leak into whatever the string literal is used for. For example, in the previous commit there is trailing whitespace leaking into the help messages. So it makes sense to forbid this outright. `zig fmt` intentionally doesn't try to clean up here --- if there's a trailing whitespace in a multiline literal, it is _ambiguous_ whether the user added that by accident, or whether they _intended_ to have a space there. If you need to have trailing whitespace, you can contatenate `\\` and `"` strings with `++`: const S = \\hello ++ " \n" ++ \\world \\ ; Closes: #19299 --- lib/std/zig/AstGen.zig | 16 ++++++++++++++++ ...multiline_string_with_trailing_whitespace.zig | 8 ++++++++ 2 files changed, 24 insertions(+) create mode 100644 test/cases/compile_errors/multiline_string_with_trailing_whitespace.zig diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 18012b802c77..045776602f03 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -11679,6 +11679,7 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice { const slice = tree.tokenSlice(tok_i); const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1; const line_bytes = slice[2 .. slice.len - carriage_return_ending]; + try astgen.strLitNodeCheckTrailingWhitespace(tok_i, line_bytes); try string_bytes.appendSlice(gpa, line_bytes); tok_i += 1; } @@ -11687,6 +11688,7 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice { const slice = tree.tokenSlice(tok_i); const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1; const line_bytes = slice[2 .. slice.len - carriage_return_ending]; + try astgen.strLitNodeCheckTrailingWhitespace(tok_i, line_bytes); try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1); string_bytes.appendAssumeCapacity('\n'); string_bytes.appendSliceAssumeCapacity(line_bytes); @@ -11699,6 +11701,20 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice { }; } +fn strLitNodeCheckTrailingWhitespace( + astgen: *AstGen, + tok: Ast.TokenIndex, + line_bytes: []const u8, +) !void { + if (line_bytes.len == 0) return; + switch (line_bytes[line_bytes.len - 1]) { + '\t', ' ' => {}, + '\r', '\n' => unreachable, + else => return, + } + return astgen.failTok(tok, "multiline string cannot contain trailing whitespace", .{}); +} + fn testNameString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !Zir.NullTerminatedString { const gpa = astgen.gpa; const string_bytes = &astgen.string_bytes; diff --git a/test/cases/compile_errors/multiline_string_with_trailing_whitespace.zig b/test/cases/compile_errors/multiline_string_with_trailing_whitespace.zig new file mode 100644 index 000000000000..12778159ec94 --- /dev/null +++ b/test/cases/compile_errors/multiline_string_with_trailing_whitespace.zig @@ -0,0 +1,8 @@ +const S = + \\Hello, world +; // ^^^^^ trailing whitespace here +// error +// backend=stage2 +// target=native +// +// :2:5: error: multiline string cannot contain trailing whitespace