fuzzing: Move to an iterator pattern for input.

Previously, the `std.testing.fuzzInput` interface was called once for a given test function. This had the downside that the test functions were unable to initialize state independent of the fuzz loop. Now, `std.testing.fuzzInput` has been replaced with `std.testing.fuzzer` which returns a `std.testing.Fuzzer`. To receive fuzz input, the function must loop on the `std.testing.Fuzzer.next` function, retrieving one `std.testing.Fuzzer.Run` at a time. That run object contains a reference to a per-loop allocator and the fuzz input for that loop. Additionally, with the ability to accept multiple fuzz inputs within the test function, the corpus provided to `std.testing.fuzzer` is now used to dry-run the test will all the provided inputs. In a future commit, this may also be used to seed the fuzzer.
ziglang · Aug 10, 2024 · d91c4da · d91c4da
1 parent ae5bf2f
commit d91c4da
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 83 deletions.
diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig
@@ -117,7 +117,7 @@ fn mainServer() !void {
                 const test_fn = builtin.test_functions[index];
                 var fail = false;
                 var skip = false;
-                is_fuzz_test = false;
+                fuzzer = .{};
                 test_fn.func() catch |err| switch (err) {
                     error.SkipZigTest => skip = true,
                     else => {
@@ -134,7 +134,7 @@ fn mainServer() !void {
                         .fail = fail,
                         .skip = skip,
                         .leak = leak,
-                        .fuzz = is_fuzz_test,
+                        .fuzz = fuzzer.test_data != null,
                         .log_err_count = std.math.lossyCast(
                             @TypeOf(@as(std.zig.Server.Message.TestResults.Flags, undefined).log_err_count),
                             log_err_count,
@@ -145,30 +145,22 @@ fn mainServer() !void {
             .start_fuzzing => {
                 if (!builtin.fuzz) unreachable;
                 const index = try server.receiveBody_u32();
-                var first = true;
                 const test_fn = builtin.test_functions[index];
-                while (true) {
-                    testing.allocator_instance = .{};
-                    defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
-                    log_err_count = 0;
-                    is_fuzz_test = false;
-                    test_fn.func() catch |err| switch (err) {
-                        error.SkipZigTest => continue,
-                        else => {
-                            if (@errorReturnTrace()) |trace| {
-                                std.debug.dumpStackTrace(trace.*);
-                            }
-                            std.debug.print("failed with error.{s}\n", .{@errorName(err)});
-                            std.process.exit(1);
-                        },
-                    };
-                    if (!is_fuzz_test) @panic("missed call to std.testing.fuzzInput");
-                    if (log_err_count != 0) @panic("error logs detected");
-                    if (first) {
-                        first = false;
-                        try server.serveU64Message(.fuzz_start_addr, entry_addr);
-                    }
-                }
+                fuzzer = .{ .server = &server };
+                testing.allocator_instance = .{};
+                defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
+                log_err_count = 0;
+                test_fn.func() catch |err| switch (err) {
+                    error.SkipZigTest => continue,
+                    else => {
+                        if (@errorReturnTrace()) |trace| {
+                            std.debug.dumpStackTrace(trace.*);
+                        }
+                        std.debug.print("failed with error.{s}\n", .{@errorName(err)});
+                        std.process.exit(1);
+                    },
+                };
+                if (log_err_count != 0) @panic("error logs detected");
             },
 
             else => {
@@ -211,7 +203,7 @@ fn mainTerminal() void {
         if (!have_tty) {
             std.debug.print("{d}/{d} {s}...", .{ i + 1, test_fn_list.len, test_fn.name });
         }
-        is_fuzz_test = false;
+        fuzzer = .{};
         if (test_fn.func()) |_| {
             ok_count += 1;
             test_node.end();
@@ -241,7 +233,7 @@ fn mainTerminal() void {
                 test_node.end();
             },
         }
-        fuzz_count += @intFromBool(is_fuzz_test);
+        fuzz_count += @intFromBool(fuzzer.test_data != null);
     }
     root_node.end();
     if (ok_count == test_fn_list.len) {
@@ -346,23 +338,79 @@ const FuzzerSlice = extern struct {
     }
 };
 
-var is_fuzz_test: bool = undefined;
-var entry_addr: usize = 0;
+// Initialized prior to executing each test function.
+var fuzzer: Fuzzer = undefined;
 
+// NOTE: These must only be referenced when `builtin.fuzz` since libfuzzer is only linked
+// when that is true.
 extern fn fuzzer_next() FuzzerSlice;
 extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
 extern fn fuzzer_coverage_id() u64;
 
-pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 {
+const Fuzzer = struct {
+    server: ?*std.zig.Server = null,
+    test_data: ?TestData = null,
+
+    const TestData = struct {
+        notification_state: enum { before_first, after_first, rest } = .before_first,
+        entry_addr: usize = 0,
+        corpus: if (!builtin.fuzz) []const []const u8 else void,
+        corpus_index: if (!builtin.fuzz) usize else void,
+        allocator_instance: std.heap.GeneralPurposeAllocator(.{}) = .{},
+    };
+};
+
+pub fn fuzzerInit(options: testing.FuzzerOptions) void {
+    if (!builtin.fuzz) {
+        fuzzer.test_data = .{
+            .corpus = if (options.corpus.len == 0) &.{""} else options.corpus,
+            .corpus_index = 0,
+        };
+        return;
+    }
+    fuzzer.test_data = .{
+        .corpus = {},
+        .corpus_index = {},
+    };
+}
+
+pub fn fuzzerNext() ?testing.Fuzzer.Run {
     @disableInstrumentation();
     if (crippled) return "";
-    is_fuzz_test = true;
-    if (builtin.fuzz) {
-        if (entry_addr == 0) entry_addr = @returnAddress();
-        return fuzzer_next().toSlice();
+    // Test date must be initialized during `fuzzerInit`.
+    const test_data = &fuzzer.test_data.?;
+    // Evaluate the previous loop. This must do nothing before the first loop.
+    if (test_data.allocator_instance.deinit() == .leak) std.process.exit(1);
+    if (log_err_count != 0) @panic("error logs detected");
+    // Reset the allocator.
+    test_data.allocator_instance = .{};
+    // Iterate over the corpus for dry-run tests.
+    if (!builtin.fuzz) {
+        if (test_data.corpus_index >= test_data.corpus.len)
+            return null;
+        const result = test_data.corpus[test_data.corpus_index];
+        test_data.corpus_index += 1;
+        return .{
+            .allocator = test_data.allocator_instance.allocator(),
+            .input = result,
+        };
     }
-    if (options.corpus.len == 0) return "";
-    var prng = std.Random.DefaultPrng.init(testing.random_seed);
-    const random = prng.random();
-    return options.corpus[random.uintLessThan(usize, options.corpus.len)];
+    // Notify the Zig server on the second call to `fuzzerNext` using the return address
+    // from the first call. Subsequent calls have no effect.
+    switch (test_data.notification_state) {
+        .before_first => {
+            test_data.entry_addr = @returnAddress();
+            test_data.notification_state = .after_first;
+        },
+        .after_first => {
+            if (fuzzer.server) |server|
+                server.serveU64Message(.fuzz_start_addr, @returnAddress()) catch @panic("internal test runner failure");
+            test_data.notification_state = .rest;
+        },
+        .rest => {},
+    }
+    return .{
+        .allocator = test_data.allocator_instance.allocator(),
+        .input = fuzzer_next().toSlice(),
+    };
 }
diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig
@@ -28,6 +28,8 @@ test "simple test" {
 
 test "fuzz example" {
     // Try passing `--fuzz` to `zig build` and see if it manages to fail this test case!
-    const input_bytes = std.testing.fuzzInput(.{});
-    try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input_bytes));
+    var fuzzer = std.testing.fuzzer(.{});
+    while (fuzzer.next()) |run| {
+        try std.testing.expect(!std.mem.eql(u8, "canyoufindme", run.input));
+    }
 }
diff --git a/lib/std/testing.zig b/lib/std/testing.zig
@@ -1137,10 +1137,23 @@ pub fn refAllDeclsRecursive(comptime T: type) void {
     }
 }
 
-pub const FuzzInputOptions = struct {
+pub const FuzzerOptions = struct {
     corpus: []const []const u8 = &.{},
 };
 
-pub inline fn fuzzInput(options: FuzzInputOptions) []const u8 {
-    return @import("root").fuzzInput(options);
+pub inline fn fuzzer(options: FuzzerOptions) Fuzzer {
+    @import("root").fuzzerInit(options);
+    return .{};
 }
+
+pub const Fuzzer = struct {
+    pub const Run = struct {
+        allocator: std.mem.Allocator,
+        input: []const u8,
+    };
+
+    pub fn next(self: *Fuzzer) ?Run {
+        _ = self;
+        return @import("root").fuzzerNext();
+    }
+};
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
@@ -1842,46 +1842,49 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
 }
 
 test "fuzzable properties upheld" {
-    const source = std.testing.fuzzInput(.{});
-    const source0 = try std.testing.allocator.dupeZ(u8, source);
-    defer std.testing.allocator.free(source0);
-    var tokenizer = Tokenizer.init(source0);
-    var tokenization_failed = false;
-    while (true) {
-        const token = tokenizer.next();
-
-        // Property: token end location after start location (or equal)
-        try std.testing.expect(token.loc.end >= token.loc.start);
-
-        switch (token.tag) {
-            .invalid => {
-                tokenization_failed = true;
-
-                // Property: invalid token always ends at newline or eof
-                try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0);
-            },
-            .eof => {
-                // Property: EOF token is always 0-length at end of source.
-                try std.testing.expectEqual(source0.len, token.loc.start);
-                try std.testing.expectEqual(source0.len, token.loc.end);
-                break;
-            },
-            else => continue,
+    var fuzzer = std.testing.fuzzer(.{});
+    while (fuzzer.next()) |run| {
+        const source = run.input;
+        const source0 = try run.allocator.dupeZ(u8, source);
+        defer run.allocator.free(source0);
+        var tokenizer = Tokenizer.init(source0);
+        var tokenization_failed = false;
+        while (true) {
+            const token = tokenizer.next();
+
+            // Property: token end location after start location (or equal)
+            try std.testing.expect(token.loc.end >= token.loc.start);
+
+            switch (token.tag) {
+                .invalid => {
+                    tokenization_failed = true;
+
+                    // Property: invalid token always ends at newline or eof
+                    try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0);
+                },
+                .eof => {
+                    // Property: EOF token is always 0-length at end of source.
+                    try std.testing.expectEqual(source0.len, token.loc.start);
+                    try std.testing.expectEqual(source0.len, token.loc.end);
+                    break;
+                },
+                else => continue,
+            }
         }
-    }
 
-    if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| {
-        // Property: No null byte allowed except at end.
-        if (cur == 0) {
-            try std.testing.expect(tokenization_failed);
-        }
-        // Property: No ASCII control characters other than \n and \t are allowed.
-        if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') {
-            try std.testing.expect(tokenization_failed);
-        }
-        // Property: All '\r' must be followed by '\n'.
-        if (cur == '\r' and next != '\n') {
-            try std.testing.expect(tokenization_failed);
-        }
-    };
+        if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| {
+            // Property: No null byte allowed except at end.
+            if (cur == 0) {
+                try std.testing.expect(tokenization_failed);
+            }
+            // Property: No ASCII control characters other than \n and \t are allowed.
+            if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') {
+                try std.testing.expect(tokenization_failed);
+            }
+            // Property: All '\r' must be followed by '\n'.
+            if (cur == '\r' and next != '\n') {
+                try std.testing.expect(tokenization_failed);
+            }
+        };
+    }
 }