From 9495060916669c0e14d3fcc3bc8b8a0eef5a60c9 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 9 Dec 2024 12:30:29 +0100 Subject: [PATCH 01/30] winch: Implement aarch64 call, trapz, address_at_vmctx (#9751) * implement trapz for aarch64 * implement call_with_name for aarch64 asm * implement call_with_reg for aarch64 asm * implement call_with_lib for aarch64 asm * tidy imports * implement address_at_vmctx for aarch64 masm * tidy imports * add tests * fmt pass * fix test after rebase * add some comments * pass calling convention down to asm after #9757 * s/Emits/Emit/ * fix test --- tests/disas/winch/aarch64/call/params.wat | 166 +++++++++++++++ tests/disas/winch/aarch64/call/recursive.wat | 84 ++++++++ .../disas/winch/aarch64/call/reg_on_stack.wat | 68 +++++++ tests/disas/winch/aarch64/call/simple.wat | 69 +++++++ .../aarch64/call_indirect/call_indirect.wat | 190 ++++++++++++++++++ .../winch/aarch64/call_indirect/local_arg.wat | 112 +++++++++++ winch/codegen/src/isa/aarch64/asm.rs | 57 +++++- winch/codegen/src/isa/aarch64/masm.rs | 36 +++- 8 files changed, 765 insertions(+), 17 deletions(-) create mode 100644 tests/disas/winch/aarch64/call/params.wat create mode 100644 tests/disas/winch/aarch64/call/recursive.wat create mode 100644 tests/disas/winch/aarch64/call/reg_on_stack.wat create mode 100644 tests/disas/winch/aarch64/call/simple.wat create mode 100644 tests/disas/winch/aarch64/call_indirect/call_indirect.wat create mode 100644 tests/disas/winch/aarch64/call_indirect/local_arg.wat diff --git a/tests/disas/winch/aarch64/call/params.wat b/tests/disas/winch/aarch64/call/params.wat new file mode 100644 index 000000000000..f7fb0484f90c --- /dev/null +++ b/tests/disas/winch/aarch64/call/params.wat @@ -0,0 +1,166 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (export "main") (param i32) (param i32) (result i32) + (local.get 1) + (local.get 0) + (i32.add) + + (call $add (i32.const 1) (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + + (local.get 1) + (local.get 0) + (i32.add) + + (call $add (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + ) + + (func $add (param i32 i32 i32 i32 i32 i32 i32 i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.add) + (local.get 2) + (i32.add) + (local.get 3) + (i32.add) + (local.get 4) + (i32.add) + (local.get 5) + (i32.add) + (local.get 6) + (i32.add) + (local.get 7) + (i32.add) + (local.get 8) + (i32.add) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28, #4] +;; ldur w1, [x28] +;; add w1, w1, w0, uxtx +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; sub sp, sp, #0x24 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #0x24] +;; mov x16, #1 +;; mov w3, w16 +;; mov x16, #2 +;; mov w4, w16 +;; mov x16, #3 +;; mov w5, w16 +;; mov x16, #4 +;; mov w6, w16 +;; mov x16, #5 +;; mov w7, w16 +;; mov x16, #6 +;; mov w16, w16 +;; stur w16, [x28] +;; mov x16, #7 +;; mov w16, w16 +;; stur w16, [x28, #8] +;; mov x16, #8 +;; mov w16, w16 +;; stur w16, [x28, #0x10] +;; bl #0x160 +;; a4: add sp, sp, #0x24 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; ldur w2, [x28] +;; add w2, w2, w1, uxtx +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w2, [x28] +;; sub sp, sp, #0x20 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #0x24] +;; ldur w3, [x28, #0x20] +;; mov x16, #2 +;; mov w4, w16 +;; mov x16, #3 +;; mov w5, w16 +;; mov x16, #4 +;; mov w6, w16 +;; mov x16, #5 +;; mov w7, w16 +;; mov x16, #6 +;; mov w16, w16 +;; stur w16, [x28] +;; mov x16, #7 +;; mov w16, w16 +;; stur w16, [x28, #8] +;; mov x16, #8 +;; mov w16, w16 +;; stur w16, [x28, #0x10] +;; bl #0x160 +;; 13c: add sp, sp, #0x20 +;; mov x28, sp +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::add: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x28 +;; mov x28, sp +;; stur x0, [x28, #0x20] +;; stur x1, [x28, #0x18] +;; stur w2, [x28, #0x14] +;; stur w3, [x28, #0x10] +;; stur w4, [x28, #0xc] +;; stur w5, [x28, #8] +;; stur w6, [x28, #4] +;; stur w7, [x28] +;; ldur w0, [x28, #0x10] +;; ldur w1, [x28, #0x14] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #0xc] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #8] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #4] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x10] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x18] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x20] +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x28 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/recursive.wat b/tests/disas/winch/aarch64/call/recursive.wat new file mode 100644 index 000000000000..1bd5f243a0a1 --- /dev/null +++ b/tests/disas/winch/aarch64/call/recursive.wat @@ -0,0 +1,84 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func $fibonacci8 (param $n i32) (result i32) + (if (result i32) (i32.le_s (local.get $n) (i32.const 1)) + (then + ;; If n <= 1, return n (base case) + (local.get $n) + ) + (else + ;; Else, return fibonacci(n - 1) + fibonacci(n - 2) + (i32.add + (call $fibonacci8 + (i32.sub (local.get $n) (i32.const 1)) ;; Calculate n - 1 + ) + (call $fibonacci8 + (i32.sub (local.get $n) (i32.const 2)) ;; Calculate n - 2 + ) + ) + ) + ) + ) + (export "fib" (func $fibonacci8)) +) + +;; wasm[0]::function[0]::fibonacci8: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; cmp w0, #1 +;; cset x0, le +;; tst w0, w0 +;; b.eq #0x44 +;; b #0x3c +;; 3c: ldur w0, [x28, #4] +;; b #0xd4 +;; 44: ldur w0, [x28, #4] +;; sub w0, w0, #1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; bl #0 +;; 70: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; sub w1, w1, #2 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28] +;; bl #0 +;; b4: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/reg_on_stack.wat b/tests/disas/winch/aarch64/call/reg_on_stack.wat new file mode 100644 index 000000000000..7e670aeca4a5 --- /dev/null +++ b/tests/disas/winch/aarch64/call/reg_on_stack.wat @@ -0,0 +1,68 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (export "") (param i32) (result i32) + local.get 0 + i32.const 1 + call 0 + i32.const 1 + call 0 + br_if 0 (;@0;) + unreachable + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w16, [x28, #4] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w16, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #1 +;; mov w2, w16 +;; bl #0 +;; 50: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #1 +;; mov w2, w16 +;; bl #0 +;; 7c: ldur x9, [x28, #0x18] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; ldur w0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; tst w1, w1 +;; b.eq #0xbc +;; b #0xb0 +;; b0: add sp, sp, #4 +;; mov x28, sp +;; b #0xc0 +;; bc: .byte 0x1f, 0xc1, 0x00, 0x00 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/simple.wat b/tests/disas/winch/aarch64/call/simple.wat new file mode 100644 index 000000000000..f9f3ab6f8ebc --- /dev/null +++ b/tests/disas/winch/aarch64/call/simple.wat @@ -0,0 +1,69 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func $main (result i32) + (local $var i32) + (call $add (i32.const 20) (i32.const 80)) + (local.set $var (i32.const 2)) + (local.get $var) + (i32.add)) + + (func $add (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.add)) +) + +;; wasm[0]::function[0]::main: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; sub sp, sp, #8 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #0x14 +;; mov w2, w16 +;; mov x16, #0x50 +;; mov w3, w16 +;; bl #0x80 +;; 4c: add sp, sp, #8 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; mov x16, #2 +;; mov w1, w16 +;; stur w1, [x28, #4] +;; ldur w1, [x28, #4] +;; add w0, w0, w1, uxtx +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::add: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call_indirect/call_indirect.wat b/tests/disas/winch/aarch64/call_indirect/call_indirect.wat new file mode 100644 index 000000000000..d3df79e54d95 --- /dev/null +++ b/tests/disas/winch/aarch64/call_indirect/call_indirect.wat @@ -0,0 +1,190 @@ +;;! target="aarch64" +;;! test = "winch" + +(module + (type $over-i32 (func (param i32) (result i32))) + + (table funcref + (elem + $fib-i32 + ) + ) + + (func $fib-i32 (export "fib-i32") (type $over-i32) + (if (result i32) (i32.le_u (local.get 0) (i32.const 1)) + (then (i32.const 1)) + (else + (i32.add + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 2)) + (i32.const 0) + ) + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 0) + ) + ) + ) + ) + ) +) + +;; wasm[0]::function[0]::fib-i32: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; cmp w0, #1 +;; cset x0, ls +;; tst w0, w0 +;; b.eq #0x48 +;; b #0x3c +;; 3c: mov x16, #1 +;; mov w0, w16 +;; b #0x250 +;; 48: ldur w0, [x28, #4] +;; sub w0, w0, #2 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x260 +;; 74: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0xd4 +;; b #0xa4 +;; a4: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28] +;; bl #0x3b4 +;; c4: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; b #0xd8 +;; d4: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x264 +;; dc: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x268 +;; f0: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; blr x4 +;; 128: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; sub w1, w1, #1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x26c +;; 174: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0x1e4 +;; b #0x1a4 +;; 1a4: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; sub sp, sp, #0xc +;; mov x28, sp +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28, #0xc] +;; bl #0x3b4 +;; 1cc: add sp, sp, #0xc +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x18] +;; b #0x1e8 +;; 1e4: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x270 +;; 1ec: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x274 +;; 200: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28] +;; blr x4 +;; 230: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 260: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 264: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 268: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 26c: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 270: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 274: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/call_indirect/local_arg.wat b/tests/disas/winch/aarch64/call_indirect/local_arg.wat new file mode 100644 index 000000000000..8f91824496b7 --- /dev/null +++ b/tests/disas/winch/aarch64/call_indirect/local_arg.wat @@ -0,0 +1,112 @@ +;;! target="aarch64" +;;! test = "winch" + +(module + (type $param-i32 (func (param i32))) + + (func $param-i32 (type $param-i32)) + (func (export "") + (local i32) + local.get 0 + (call_indirect (type $param-i32) (i32.const 0)) + ) + + (table funcref + (elem + $param-i32) + ) +) + +;; wasm[0]::function[0]::param-i32: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w16, [x28, #4] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w16, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x168 +;; 90: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0xf0 +;; b #0xc0 +;; c0: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28] +;; bl #0x3a4 +;; e0: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; b #0xf4 +;; f0: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x16c +;; f8: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x170 +;; 10c: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; blr x4 +;; 144: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 168: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 16c: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 170: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index e5bb5ee17502..0889309be1bf 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -2,22 +2,21 @@ use super::{address::Address, regs}; use crate::masm::{ExtendKind, FloatCmpKind, IntCmpKind, RoundingMode, ShiftKind}; +use crate::CallingConvention; use crate::{ masm::OperandSize, reg::{writable, Reg, WritableReg}, }; -use cranelift_codegen::ir::TrapCode; -use cranelift_codegen::isa::aarch64::inst::{ - BitOp, BranchTarget, Cond, CondBrKind, FPULeftShiftImm, FPUOp1, FPUOp2, - FPUOpRI::{self, UShr32, UShr64}, - FPUOpRIMod, FPURightShiftImm, FpuRoundMode, ImmLogic, ImmShift, ScalarSize, -}; use cranelift_codegen::{ - ir::{MemFlags, SourceLoc}, + ir::{ExternalName, LibCall, MemFlags, SourceLoc, TrapCode, UserExternalNameRef}, isa::aarch64::inst::{ self, emit::{EmitInfo, EmitState}, - ALUOp, ALUOp3, AMode, ExtendOp, Imm12, Inst, PairAMode, VecLanesOp, VecMisc2, VectorSize, + ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, FPULeftShiftImm, + FPUOp1, FPUOp2, + FPUOpRI::{self, UShr32, UShr64}, + FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, PairAMode, + ScalarSize, VecLanesOp, VecMisc2, VectorSize, }, settings, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, Writable, @@ -724,6 +723,14 @@ impl Assembler { }); } + /// Trap if `rn` is zero. + pub fn trapz(&mut self, rn: Reg, code: TrapCode) { + self.emit(Inst::TrapIf { + kind: CondBrKind::Zero(rn.into()), + trap_code: code, + }); + } + // Helpers for ALU operations. fn emit_alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { @@ -901,4 +908,38 @@ impl Assembler { pub fn buffer(&self) -> &MachBuffer { &self.buffer } + + /// Emit a direct call to a function defined locally and + /// referenced to by `name`. + pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) { + self.emit(Inst::Call { + info: Box::new(cranelift_codegen::CallInfo::empty( + ExternalName::user(name), + call_conv.into(), + )), + }) + } + + /// Emit an indirect call to a function whose address is + /// stored the `callee` register. + pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) { + self.emit(Inst::CallInd { + info: Box::new(cranelift_codegen::CallInfo::empty( + callee.into(), + call_conv.into(), + )), + }) + } + + /// Emit a call to a well-known libcall. + /// `dst` is used as a scratch register to hold the address of the libcall function. + pub fn call_with_lib(&mut self, lib: LibCall, dst: Reg, call_conv: CallingConvention) { + let name = ExternalName::LibCall(lib); + self.emit(Inst::LoadExtName { + rd: writable!(dst.into()), + name: name.into(), + offset: 0, + }); + self.call_with_reg(dst, call_conv) + } } diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 76a81cea3c3c..ddfa5193984d 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1,6 +1,11 @@ -use super::{abi::Aarch64ABI, address::Address, asm::Assembler, regs}; +use super::{ + abi::Aarch64ABI, + address::Address, + asm::Assembler, + regs::{self, scratch}, +}; use crate::{ - abi::local::LocalSlot, + abi::{self, align_to, calculate_frame_adjustment, local::LocalSlot, vmctx}, codegen::{ptr_type_from_ptr_size, CodeGenContext, Emission, FuncEnv}, isa::{ reg::{writable, Reg, WritableReg}, @@ -126,8 +131,8 @@ impl Masm for MacroAssembler { Address::from_shadow_sp(offset.as_u32() as i64) } - fn address_at_vmctx(&self, _offset: u32) -> Self::Address { - todo!() + fn address_at_vmctx(&self, offset: u32) -> Self::Address { + Address::offset(vmctx!(Self), offset as i64) } fn store_ptr(&mut self, src: Reg, dst: Self::Address) { @@ -165,10 +170,23 @@ impl Masm for MacroAssembler { fn call( &mut self, - _stack_args_size: u32, - _load_callee: impl FnMut(&mut Self) -> (CalleeKind, CallingConvention), + stack_args_size: u32, + mut load_callee: impl FnMut(&mut Self) -> (CalleeKind, CallingConvention), ) -> u32 { - todo!() + let alignment: u32 = ::call_stack_align().into(); + let addend: u32 = ::arg_base_offset().into(); + let delta = calculate_frame_adjustment(self.sp_offset().as_u32(), addend, alignment); + let aligned_args_size = align_to(stack_args_size, alignment); + let total_stack = delta + aligned_args_size; + self.reserve_stack(total_stack); + let (callee, call_conv) = load_callee(self); + match callee { + CalleeKind::Indirect(reg) => self.asm.call_with_reg(reg, call_conv), + CalleeKind::Direct(idx) => self.asm.call_with_name(idx, call_conv), + CalleeKind::LibCall(lib) => self.asm.call_with_lib(lib, scratch(), call_conv), + } + + total_stack } fn load(&mut self, src: Address, dst: WritableReg, size: OperandSize) { @@ -662,8 +680,8 @@ impl Masm for MacroAssembler { self.asm.udf(code); } - fn trapz(&mut self, _src: Reg, _code: TrapCode) { - todo!() + fn trapz(&mut self, src: Reg, code: TrapCode) { + self.asm.trapz(src, code); } fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) { From e262318dc4a1256b19ae73fae53d859b8ed249fd Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 9 Dec 2024 09:04:04 -0700 Subject: [PATCH 02/30] Add `-Wextended-const` CLI flag (#9768) Adds a flag that can be used to control the `extended-const` proposal on the CLI. --- crates/cli-flags/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index b1427614ca39..938928560a6b 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -351,6 +351,8 @@ wasmtime_option_group! { pub custom_page_sizes: Option, /// Configure support for the wide-arithmetic proposal. pub wide_arithmetic: Option, + /// Configure support for the extended-const proposal. + pub extended_const: Option, } enum Wasm { @@ -886,6 +888,9 @@ impl CommonOptions { if let Some(enable) = self.wasm.wide_arithmetic.or(all) { config.wasm_wide_arithmetic(enable); } + if let Some(enable) = self.wasm.extended_const.or(all) { + config.wasm_extended_const(enable); + } macro_rules! handle_conditionally_compiled { ($(($feature:tt, $field:tt, $method:tt))*) => ($( From 5dcaa13ced14a4fe135036c82fdc9cb9170b1074 Mon Sep 17 00:00:00 2001 From: Simon Stridsberg Date: Mon, 9 Dec 2024 18:58:57 +0100 Subject: [PATCH 03/30] wasmtime_instance_pre_instantiate should take a wasmtime_context_t *store and not wasmtime_store_t *store (#9770) --- crates/c-api/include/wasmtime/instance.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/c-api/include/wasmtime/instance.h b/crates/c-api/include/wasmtime/instance.h index 0d63cfb7a421..8c57d6ab2153 100644 --- a/crates/c-api/include/wasmtime/instance.h +++ b/crates/c-api/include/wasmtime/instance.h @@ -150,7 +150,7 @@ wasmtime_instance_pre_delete(wasmtime_instance_pre_t *instance_pre); * values are owned by the caller. */ WASM_API_EXTERN wasmtime_error_t *wasmtime_instance_pre_instantiate( - const wasmtime_instance_pre_t *instance_pre, wasmtime_store_t *store, + const wasmtime_instance_pre_t *instance_pre, wasmtime_context_t *store, wasmtime_instance_t *instance, wasm_trap_t **trap_ptr); /** From 4ca5174546518d20780aad72770717531c633fd3 Mon Sep 17 00:00:00 2001 From: minirop Date: Mon, 9 Dec 2024 22:42:36 +0100 Subject: [PATCH 04/30] winch Aarch64: reinterpret_float_as_int, reinterpret_int_as_float (#9767) --- .../aarch64/f32_reinterpret_i32/const.wat | 25 ++++++++++++++ .../aarch64/f32_reinterpret_i32/locals.wat | 28 +++++++++++++++ .../aarch64/f32_reinterpret_i32/params.wat | 25 ++++++++++++++ .../aarch64/f32_reinterpret_i32/ret_int.wat | 29 ++++++++++++++++ .../aarch64/f32_reinterpret_i32/spilled.wat | 33 ++++++++++++++++++ .../aarch64/f64_reinterpret_i64/const.wat | 25 ++++++++++++++ .../aarch64/f64_reinterpret_i64/locals.wat | 28 +++++++++++++++ .../aarch64/f64_reinterpret_i64/params.wat | 25 ++++++++++++++ .../aarch64/f64_reinterpret_i64/ret_int.wat | 29 ++++++++++++++++ .../aarch64/f64_reinterpret_i64/spilled.wat | 33 ++++++++++++++++++ .../aarch64/i32_reinterpret_f32/const.wat | 25 ++++++++++++++ .../aarch64/i32_reinterpret_f32/locals.wat | 28 +++++++++++++++ .../aarch64/i32_reinterpret_f32/params.wat | 25 ++++++++++++++ .../aarch64/i32_reinterpret_f32/ret_float.wat | 29 ++++++++++++++++ .../aarch64/i64_reinterpret_f64/const.wat | 25 ++++++++++++++ .../aarch64/i64_reinterpret_f64/locals.wat | 28 +++++++++++++++ .../aarch64/i64_reinterpret_f64/params.wat | 25 ++++++++++++++ .../aarch64/i64_reinterpret_f64/ret_float.wat | 29 ++++++++++++++++ winch/codegen/src/isa/aarch64/asm.rs | 34 +++++++++++++++++-- winch/codegen/src/isa/aarch64/masm.rs | 8 ++--- 20 files changed, 530 insertions(+), 6 deletions(-) create mode 100644 tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat create mode 100644 tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat create mode 100644 tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat create mode 100644 tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat create mode 100644 tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat create mode 100644 tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat create mode 100644 tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat create mode 100644 tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat create mode 100644 tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat create mode 100644 tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat create mode 100644 tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat create mode 100644 tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat create mode 100644 tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat create mode 100644 tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat create mode 100644 tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat create mode 100644 tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat create mode 100644 tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat create mode 100644 tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat new file mode 100644 index 000000000000..608f8faf7457 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat new file mode 100644 index 000000000000..0d093cb0a6ec --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat new file mode 100644 index 000000000000..27cdda4b9464 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat new file mode 100644 index 000000000000..106ac60e48b1 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + i32.const 1 + f32.reinterpret_i32 + drop + i32.const 1 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; mov x16, #1 +;; mov w0, w16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat new file mode 100644 index 000000000000..d458d4ef32a0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.reinterpret_i32 + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat new file mode 100644 index 000000000000..850e6d7af3c1 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat new file mode 100644 index 000000000000..03bf4808fc47 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat new file mode 100644 index 000000000000..fd5c1e5c00c1 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat new file mode 100644 index 000000000000..71c0b1b22ad9 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + i64.const 1 + f64.reinterpret_i64 + drop + i64.const 1 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; mov x16, #1 +;; mov x0, x16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat new file mode 100644 index 000000000000..9eddaf9cb11f --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.reinterpret_i64 + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat new file mode 100644 index 000000000000..7df0566fc97a --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (f32.const 1.0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; fcvtzs w0, s0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat new file mode 100644 index 000000000000..3d2372f5c681 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (local f32) + + (local.get 0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur s0, [x28, #4] +;; fcvtzs w0, s0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat new file mode 100644 index 000000000000..953a8fe1b339 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param f32) (result i32) + (local.get 0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur s0, [x28, #4] +;; ldur s0, [x28, #4] +;; fcvtzs w0, s0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat new file mode 100644 index 000000000000..d8c9ab13a43b --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + f32.const 1.0 + i32.reinterpret_f32 + drop + f32.const 1.0 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; fcvtzs w0, s0 +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat new file mode 100644 index 000000000000..dd8b0f44fb1f --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (f64.const 1.0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; fcvtzs x0, d0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat new file mode 100644 index 000000000000..7170a44a4e8b --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (local f64) + + (local.get 0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur d0, [x28] +;; fcvtzs x0, d0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat new file mode 100644 index 000000000000..52ed87a9e24d --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param f64) (result i64) + (local.get 0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur d0, [x28] +;; ldur d0, [x28] +;; fcvtzs x0, d0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat new file mode 100644 index 000000000000..7419af60ff6b --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + f64.const 1.0 + i64.reinterpret_f64 + drop + f64.const 1.0 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; fcvtzs x0, d0 +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index 0889309be1bf..64bcc726ce68 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -15,8 +15,8 @@ use cranelift_codegen::{ ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, FPULeftShiftImm, FPUOp1, FPUOp2, FPUOpRI::{self, UShr32, UShr64}, - FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, PairAMode, - ScalarSize, VecLanesOp, VecMisc2, VectorSize, + FPUOpRIMod, FPURightShiftImm, FpuRoundMode, FpuToIntOp, Imm12, ImmLogic, ImmShift, Inst, + IntToFpuOp, PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, }, settings, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, Writable, @@ -599,6 +599,36 @@ impl Assembler { }) } + /// Reinterpret a float as an integer. + pub fn fpu_to_int(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { + let op = match size { + OperandSize::S32 => FpuToIntOp::F32ToI32, + OperandSize::S64 => FpuToIntOp::F64ToI64, + OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(), + }; + + self.emit(Inst::FpuToInt { + op, + rd: rd.map(Into::into), + rn: rn.into(), + }); + } + + /// Reinterpret an integer as a float. + pub fn int_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { + let op = match size { + OperandSize::S32 => IntToFpuOp::I32ToF32, + OperandSize::S64 => IntToFpuOp::I64ToF64, + OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(), + }; + + self.emit(Inst::IntToFpu { + op, + rd: rd.map(Into::into), + rn: rn.into(), + }); + } + /// Change precision of float. pub fn cvt_float_to_float( &mut self, diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index ddfa5193984d..57fc5c059140 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -537,12 +537,12 @@ impl Masm for MacroAssembler { todo!() } - fn reinterpret_float_as_int(&mut self, _dst: WritableReg, _src: Reg, _size: OperandSize) { - todo!() + fn reinterpret_float_as_int(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { + self.asm.fpu_to_int(src, dst, size); } - fn reinterpret_int_as_float(&mut self, _dst: WritableReg, _src: Reg, _size: OperandSize) { - todo!() + fn reinterpret_int_as_float(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { + self.asm.int_to_fpu(src, dst, size); } fn demote(&mut self, dst: WritableReg, src: Reg) { From bbf05aa2bd85175cf8a0e04a90b280c47387dcf2 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 10 Dec 2024 13:54:44 +0100 Subject: [PATCH 05/30] winch: implement div for aarch64 (#9762) * implement div for aarch64 * add tests * fmt pass * review edits * fmt pass --- tests/disas/winch/aarch64/i32_divs/const.wat | 38 +++++++++ .../disas/winch/aarch64/i32_divs/one_zero.wat | 38 +++++++++ .../disas/winch/aarch64/i32_divs/overflow.wat | 38 +++++++++ tests/disas/winch/aarch64/i32_divs/params.wat | 38 +++++++++ .../winch/aarch64/i32_divs/zero_zero.wat | 38 +++++++++ tests/disas/winch/aarch64/i32_divu/const.wat | 34 ++++++++ .../disas/winch/aarch64/i32_divu/one_zero.wat | 34 ++++++++ tests/disas/winch/aarch64/i32_divu/params.wat | 34 ++++++++ tests/disas/winch/aarch64/i32_divu/signed.wat | 34 ++++++++ .../winch/aarch64/i32_divu/zero_zero.wat | 34 ++++++++ tests/disas/winch/aarch64/i64_divs/const.wat | 36 +++++++++ .../disas/winch/aarch64/i64_divs/one_zero.wat | 36 +++++++++ .../disas/winch/aarch64/i64_divs/overflow.wat | 36 +++++++++ tests/disas/winch/aarch64/i64_divs/params.wat | 36 +++++++++ .../winch/aarch64/i64_divs/zero_zero.wat | 36 +++++++++ tests/disas/winch/aarch64/i64_divu/const.wat | 32 ++++++++ .../disas/winch/aarch64/i64_divu/one_zero.wat | 32 ++++++++ tests/disas/winch/aarch64/i64_divu/params.wat | 32 ++++++++ tests/disas/winch/aarch64/i64_divu/signed.wat | 32 ++++++++ .../winch/aarch64/i64_divu/zero_zero.wat | 32 ++++++++ winch/codegen/src/isa/aarch64/asm.rs | 79 ++++++++++++++++++- winch/codegen/src/isa/aarch64/masm.rs | 15 +++- winch/codegen/src/masm.rs | 2 +- 23 files changed, 791 insertions(+), 5 deletions(-) create mode 100644 tests/disas/winch/aarch64/i32_divs/const.wat create mode 100644 tests/disas/winch/aarch64/i32_divs/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_divs/overflow.wat create mode 100644 tests/disas/winch/aarch64/i32_divs/params.wat create mode 100644 tests/disas/winch/aarch64/i32_divs/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_divu/const.wat create mode 100644 tests/disas/winch/aarch64/i32_divu/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_divu/params.wat create mode 100644 tests/disas/winch/aarch64/i32_divu/signed.wat create mode 100644 tests/disas/winch/aarch64/i32_divu/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_divs/const.wat create mode 100644 tests/disas/winch/aarch64/i64_divs/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_divs/overflow.wat create mode 100644 tests/disas/winch/aarch64/i64_divs/params.wat create mode 100644 tests/disas/winch/aarch64/i64_divs/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_divu/const.wat create mode 100644 tests/disas/winch/aarch64/i64_divu/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_divu/params.wat create mode 100644 tests/disas/winch/aarch64/i64_divu/signed.wat create mode 100644 tests/disas/winch/aarch64/i64_divu/zero_zero.wat diff --git a/tests/disas/winch/aarch64/i32_divs/const.wat b/tests/disas/winch/aarch64/i32_divs/const.wat new file mode 100644 index 000000000000..92a6d05d1964 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/const.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 20) + (i32.const 10) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov w0, w16 +;; mov x16, #0x14 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/one_zero.wat b/tests/disas/winch/aarch64/i32_divs/one_zero.wat new file mode 100644 index 000000000000..e70375c293fe --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/one_zero.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/overflow.wat b/tests/disas/winch/aarch64/i32_divs/overflow.wat new file mode 100644 index 000000000000..88fc9621abaa --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/overflow.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0x80000000) + (i32.const -1) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; mov x16, #0x80000000 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/params.wat b/tests/disas/winch/aarch64/i32_divs/params.wat new file mode 100644 index 000000000000..0976a08b25a8 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/params.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/zero_zero.wat b/tests/disas/winch/aarch64/i32_divs/zero_zero.wat new file mode 100644 index 000000000000..ce02fce00605 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/zero_zero.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/const.wat b/tests/disas/winch/aarch64/i32_divu/const.wat new file mode 100644 index 000000000000..3744727fab30 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/const.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 20) + (i32.const 10) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov w0, w16 +;; mov x16, #0x14 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/one_zero.wat b/tests/disas/winch/aarch64/i32_divu/one_zero.wat new file mode 100644 index 000000000000..a6ed69986a08 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/one_zero.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/params.wat b/tests/disas/winch/aarch64/i32_divu/params.wat new file mode 100644 index 000000000000..33014c3f0911 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/params.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x54 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/signed.wat b/tests/disas/winch/aarch64/i32_divu/signed.wat new file mode 100644 index 000000000000..f94f5aaef9b0 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/signed.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const -1) + (i32.const -1) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; orr x16, xzr, #0xffffffff +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/zero_zero.wat b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat new file mode 100644 index 000000000000..e1733ff11ae9 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/const.wat b/tests/disas/winch/aarch64/i64_divs/const.wat new file mode 100644 index 000000000000..8c81a2e4b499 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/const.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 20) + (i64.const 10) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov x0, x16 +;; mov x16, #0x14 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/one_zero.wat b/tests/disas/winch/aarch64/i64_divs/one_zero.wat new file mode 100644 index 000000000000..61fb7b28278f --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/one_zero.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/overflow.wat b/tests/disas/winch/aarch64/i64_divs/overflow.wat new file mode 100644 index 000000000000..eadec00b4324 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/overflow.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0x8000000000000000) + (i64.const -1) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-0x8000000000000000 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/params.wat b/tests/disas/winch/aarch64/i64_divs/params.wat new file mode 100644 index 000000000000..72c5f9c3df1d --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/params.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/zero_zero.wat b/tests/disas/winch/aarch64/i64_divs/zero_zero.wat new file mode 100644 index 000000000000..c356678cbd80 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/zero_zero.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/const.wat b/tests/disas/winch/aarch64/i64_divu/const.wat new file mode 100644 index 000000000000..23dd7cca4866 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/const.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 20) + (i64.const 10) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov x0, x16 +;; mov x16, #0x14 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/one_zero.wat b/tests/disas/winch/aarch64/i64_divu/one_zero.wat new file mode 100644 index 000000000000..bd6a84f50cce --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/one_zero.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/params.wat b/tests/disas/winch/aarch64/i64_divu/params.wat new file mode 100644 index 000000000000..5bbefb7aea88 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/params.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/signed.wat b/tests/disas/winch/aarch64/i64_divu/signed.wat new file mode 100644 index 000000000000..7d425f4e4e5c --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/signed.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const -1) + (i64.const -1) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-1 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/zero_zero.wat b/tests/disas/winch/aarch64/i64_divu/zero_zero.wat new file mode 100644 index 000000000000..71f9c4285d0b --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/zero_zero.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index 64bcc726ce68..f94d6fc81758 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -1,12 +1,14 @@ //! Assembler library implementation for Aarch64. use super::{address::Address, regs}; -use crate::masm::{ExtendKind, FloatCmpKind, IntCmpKind, RoundingMode, ShiftKind}; +use crate::aarch64::regs::zero; +use crate::masm::{DivKind, ExtendKind, FloatCmpKind, IntCmpKind, RoundingMode, ShiftKind}; use crate::CallingConvention; use crate::{ masm::OperandSize, reg::{writable, Reg, WritableReg}, }; +use cranelift_codegen::isa::aarch64::inst::{UImm5, NZCV}; use cranelift_codegen::{ ir::{ExternalName, LibCall, MemFlags, SourceLoc, TrapCode, UserExternalNameRef}, isa::aarch64::inst::{ @@ -396,6 +398,81 @@ impl Assembler { self.emit_alu_rrrr(ALUOp3::MAdd, scratch, rn, rd, regs::zero(), size); } + /// Signed/unsigned division with three registers. + pub fn div_rrr( + &mut self, + divisor: Reg, + dividend: Reg, + dest: Writable, + kind: DivKind, + size: OperandSize, + ) { + // Check for division by 0 + self.emit(Inst::TrapIf { + kind: CondBrKind::Zero(divisor.into()), + trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO, + }); + + // check for overflow + if kind == DivKind::Signed { + // we first check whether the divisor is -1 + self.emit(Inst::AluRRImm12 { + alu_op: ALUOp::AddS, + size: size.into(), + rd: writable!(zero().into()), + rn: divisor.into(), + imm12: Imm12::maybe_from_u64(1).expect("1 fits in 12 bits"), + }); + // if it is -1, then we check if the dividend is MIN + self.emit(Inst::CCmpImm { + size: size.into(), + rn: dividend.into(), + imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"), + nzcv: NZCV::new(false, false, false, false), + cond: Cond::Eq, + }); + + // Finally, trap if the previous operation overflowed + self.emit(Inst::TrapIf { + kind: CondBrKind::Cond(Cond::Vs), + trap_code: TrapCode::INTEGER_OVERFLOW, + }) + } + + // `cranelift-codegen` doesn't support emitting u/sdiv for anything but I64, + // we therefore sign-extend the operand. + // see: https://github.com/bytecodealliance/wasmtime/issues/9766 + if size == OperandSize::S32 { + self.emit(Inst::Extend { + rd: writable!(divisor.into()), + rn: divisor.into(), + signed: true, + from_bits: 32, + to_bits: 64, + }); + self.emit(Inst::Extend { + rd: writable!(dividend.into()), + rn: dividend.into(), + signed: true, + from_bits: 32, + to_bits: 64, + }); + } + + let op = match kind { + DivKind::Signed => ALUOp::SDiv, + DivKind::Unsigned => ALUOp::UDiv, + }; + + self.emit_alu_rrr( + op, + divisor, + dividend, + dest.map(Into::into), + OperandSize::S64, + ); + } + /// And with three registers. pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit_alu_rrr(ALUOp::And, rm, rn, rd, size); diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 57fc5c059140..79d0d388c70b 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -16,6 +16,7 @@ use crate::{ MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, RoundingMode, SPOffset, ShiftKind, StackSlot, TrapCode, TruncKind, }, + stack::TypedReg, }; use cranelift_codegen::{ binemit::CodeOffset, @@ -24,7 +25,7 @@ use cranelift_codegen::{ settings, Final, MachBufferFinalized, MachLabel, }; use regalloc2::RegClass; -use wasmtime_environ::PtrSize; +use wasmtime_environ::{PtrSize, WasmValType}; /// Aarch64 MacroAssembler. pub(crate) struct MacroAssembler { @@ -470,8 +471,16 @@ impl Masm for MacroAssembler { context.stack.push(dst.into()); } - fn div(&mut self, _context: &mut CodeGenContext, _kind: DivKind, _size: OperandSize) { - todo!() + fn div(&mut self, context: &mut CodeGenContext, kind: DivKind, size: OperandSize) { + context.binop(self, size, |this, dividend, divisor, size| { + this.asm + .div_rrr(divisor, dividend, writable!(dividend), kind, size); + match size { + OperandSize::S32 => TypedReg::new(WasmValType::I32, dividend), + OperandSize::S64 => TypedReg::new(WasmValType::I64, dividend), + s => unreachable!("invalid size for division: {s:?}"), + } + }) } fn rem(&mut self, _context: &mut CodeGenContext, _kind: RemKind, _size: OperandSize) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index da25d88c486a..51b6e963c81c 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -14,7 +14,7 @@ use wasmtime_environ::PtrSize; pub(crate) use cranelift_codegen::ir::TrapCode; -#[derive(Eq, PartialEq)] +#[derive(Eq, PartialEq, Clone, Copy)] pub(crate) enum DivKind { /// Signed division. Signed, From e9cc928f49aa9abcb773a4bc242ab33dd355f6b3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 07:15:09 -0700 Subject: [PATCH 06/30] Fix minor wast fuzzing issues (#9764) * Sort tests by name to ensure that a reproduction on one machine works on another. * Relax the error message matching to be a bit more lenient with paths. --- crates/fuzzing/build.rs | 3 ++- crates/wast/src/wast.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/fuzzing/build.rs b/crates/fuzzing/build.rs index 08b9bddfe3a0..f58919189f26 100644 --- a/crates/fuzzing/build.rs +++ b/crates/fuzzing/build.rs @@ -15,7 +15,8 @@ fn main() { root.pop(); // chop off 'fuzzing' root.pop(); // chop off 'crates' - let tests = wasmtime_wast_util::find_tests(&root).unwrap(); + let mut tests = wasmtime_wast_util::find_tests(&root).unwrap(); + tests.sort_by_key(|test| test.path.clone()); let mut code = format!("static FILES: &[fn() -> wasmtime_wast_util::WastTest] = &[\n"); diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index 9fba3fe2b44d..d44a48171004 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -624,7 +624,7 @@ fn is_matching_assert_invalid_error_message(test: &str, expected: &str, actual: // and another asserts a different error message). Overall we didn't benefit // a whole lot from trying to match errors so just assume the error is // roughly the same and otherwise don't try to match it. - if Path::new(test).starts_with("./tests/spec_testsuite") { + if test.contains("spec_testsuite") { return true; } From c42d92bffb7cf678c952778041b50b3929f04197 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 07:15:59 -0700 Subject: [PATCH 07/30] Fix some off-by-one comparisons in assertions (#9763) * Fix some off-by-one comparisons in assertions Fuzzing found a small issue with #9687 and this commit relaxes a few off-by-one checks to allow addressing one-byte-beyond-the-end of a linear memory. * Review comments --- crates/wasmtime/src/runtime/vm/mmap.rs | 9 +++------ tests/all/memory.rs | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/crates/wasmtime/src/runtime/vm/mmap.rs b/crates/wasmtime/src/runtime/vm/mmap.rs index f06bb0a8d436..78e50939c26b 100644 --- a/crates/wasmtime/src/runtime/vm/mmap.rs +++ b/crates/wasmtime/src/runtime/vm/mmap.rs @@ -135,9 +135,9 @@ impl Mmap { /// Return a struct representing a page-aligned offset into the mmap. /// - /// Returns an error if `offset >= self.len_aligned()`. + /// Returns an error if `offset > self.len_aligned()`. pub fn offset(self: &Arc, offset: HostAlignedByteCount) -> Result { - if offset >= self.len_aligned() { + if offset > self.len_aligned() { bail!( "offset {} is not in bounds for mmap: {}", offset, @@ -359,11 +359,8 @@ pub struct MmapOffset { impl MmapOffset { #[inline] fn new(mmap: Arc>, offset: HostAlignedByteCount) -> Self { - // Note < rather than <=. This currently cannot represent the logical - // end of the mmap. We may need to change this if that becomes - // necessary. assert!( - offset < mmap.len_aligned(), + offset <= mmap.len_aligned(), "offset {} is in bounds (< {})", offset, mmap.len_aligned(), diff --git a/tests/all/memory.rs b/tests/all/memory.rs index 6c8f1c651924..fb46d40d0d36 100644 --- a/tests/all/memory.rs +++ b/tests/all/memory.rs @@ -734,3 +734,19 @@ fn get_memory_type_with_custom_page_size_from_wasm(config: &mut Config) -> Resul Ok(()) } + +#[wasmtime_test] +fn configure_zero(config: &mut Config) -> Result<()> { + config.guard_before_linear_memory(false); + config.memory_guard_size(0); + config.memory_reservation(0); + config.memory_reservation_for_growth(0); + let engine = Engine::new(&config)?; + let mut store = Store::new(&engine, ()); + + let ty = MemoryType::new(0, None); + let memory = Memory::new(&mut store, ty)?; + assert_eq!(memory.data_size(&store), 0); + + Ok(()) +} From c665876946004de9e28190999f0d8c70c3edc41a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 07:18:15 -0700 Subject: [PATCH 08/30] Procedurally generate Pulley Cranelift boilerplate (#9760) * Procedurally generate Pulley Cranelift boilerplate This commit is an integration of the `for_each_op!` macro (and extended ops) for Cranelift. This procedurally generates a few new items for Cranelift's Pulley backend to use: * `RawInst` - a raw enumeration of instructions as-is. * ISLE constructors (e.g. `pulley_*` ctors) - generated for all of the `RawInst` variants. * Register allocation methods for `RawInst` * Pretty printing methods for `RawInst` * Emission methods for `RawInst` The `Inst` enum now has a `Raw` variant which contains a `RawInst`. In this manner the main `Inst` enum can still have pseudo-insts like `Call`, polymorphic instructions like loads/stores (probably gonna get refactored in the future though), and slightly different representations such as `Inst::Trap` having a `TrapCode` and `RawInst::Trap` wouldn't. The goal of this commit is to lower the amount of effort to quickly add and experiment with new instructions in Pulley. Ideally it's now just (a) define them in the pulley macro, (b) implement a direct lowering rule, and (c) implement it in the interpreter. Ideally no need to implement anything else inside of Cranelift as everything should be auto-generated. Many existing `Inst` variants have been deleted in favor of their equivalents in `RawInst` now. This undeniably increases the complexity of the Pulley backend but at least for me I find it well worth it to have all this boilerplate generated automatically. * Fill out TODO * Fix conditional generation of pulley ISLE --- Cargo.lock | 1 + cranelift/codegen/Cargo.toml | 7 +- cranelift/codegen/meta/Cargo.toml | 4 + cranelift/codegen/meta/src/isle.rs | 6 +- cranelift/codegen/meta/src/lib.rs | 15 +- cranelift/codegen/meta/src/pulley.rs | 331 ++++++++++++++++++ .../codegen/src/isa/pulley_shared/abi.rs | 26 +- .../codegen/src/isa/pulley_shared/inst.isle | 244 +------------ .../src/isa/pulley_shared/inst/emit.rs | 66 +--- .../codegen/src/isa/pulley_shared/inst/mod.rs | 308 ++-------------- .../filetests/isa/pulley32/br_table.clif | 10 +- .../filetests/isa/pulley32/brif-icmp.clif | 44 +-- .../filetests/isa/pulley32/brif.clif | 38 +- .../filetests/isa/pulley32/call.clif | 116 +++--- .../filetests/isa/pulley32/iadd.clif | 8 +- .../filetests/isa/pulley32/icmp.clif | 80 ++--- .../filetests/isa/pulley32/iconst.clif | 8 +- .../filetests/isa/pulley32/jump.clif | 4 +- .../filetests/isa/pulley32/stack_addr.clif | 4 +- .../filetests/isa/pulley32/trap.clif | 16 +- .../filetests/isa/pulley64/br_table.clif | 10 +- .../filetests/isa/pulley64/brif-icmp.clif | 44 +-- .../filetests/isa/pulley64/brif.clif | 38 +- .../filetests/isa/pulley64/call.clif | 152 ++++---- .../filetests/isa/pulley64/iadd.clif | 8 +- .../filetests/isa/pulley64/icmp.clif | 80 ++--- .../filetests/isa/pulley64/iconst.clif | 8 +- .../filetests/isa/pulley64/jump.clif | 4 +- .../filetests/isa/pulley64/stack_addr.clif | 4 +- .../filetests/isa/pulley64/trap.clif | 16 +- pulley/src/lib.rs | 2 + 31 files changed, 777 insertions(+), 925 deletions(-) create mode 100644 cranelift/codegen/meta/src/pulley.rs diff --git a/Cargo.lock b/Cargo.lock index 84d0d02016a3..dc075f969e35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -760,6 +760,7 @@ name = "cranelift-codegen-meta" version = "0.116.0" dependencies = [ "cranelift-codegen-shared", + "pulley-interpreter", ] [[package]] diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 8c4a0a97f5b3..469cd639fc7e 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -86,7 +86,12 @@ x86 = [] arm64 = [] s390x = [] riscv64 = [] -pulley = ["dep:pulley-interpreter", "pulley-interpreter/encode", "pulley-interpreter/disas"] +pulley = [ + "dep:pulley-interpreter", + "pulley-interpreter/encode", + "pulley-interpreter/disas", + "cranelift-codegen-meta/pulley", +] # Enable the ISA target for the host machine host-arch = [] diff --git a/cranelift/codegen/meta/Cargo.toml b/cranelift/codegen/meta/Cargo.toml index 52e3df080ff9..46f042af9148 100644 --- a/cranelift/codegen/meta/Cargo.toml +++ b/cranelift/codegen/meta/Cargo.toml @@ -17,3 +17,7 @@ rustdoc-args = [ "--document-private-items" ] [dependencies] cranelift-codegen-shared = { path = "../shared", version = "0.116.0" } +pulley-interpreter = { workspace = true, optional = true } + +[features] +pulley = ['dep:pulley-interpreter'] diff --git a/cranelift/codegen/meta/src/isle.rs b/cranelift/codegen/meta/src/isle.rs index da32cda673f6..e2210df3a31d 100644 --- a/cranelift/codegen/meta/src/isle.rs +++ b/cranelift/codegen/meta/src/isle.rs @@ -64,6 +64,8 @@ pub fn get_isle_compilations( let prelude_isle = codegen_crate_dir.join("src").join("prelude.isle"); let prelude_opt_isle = codegen_crate_dir.join("src").join("prelude_opt.isle"); let prelude_lower_isle = codegen_crate_dir.join("src").join("prelude_lower.isle"); + #[cfg(feature = "pulley")] + let pulley_gen = gen_dir.join("pulley_gen.isle"); // Directory for mid-end optimizations. let src_opts = codegen_crate_dir.join("src").join("opts"); @@ -73,6 +75,7 @@ pub fn get_isle_compilations( let src_isa_aarch64 = codegen_crate_dir.join("src").join("isa").join("aarch64"); let src_isa_s390x = codegen_crate_dir.join("src").join("isa").join("s390x"); let src_isa_risc_v = codegen_crate_dir.join("src").join("isa").join("riscv64"); + #[cfg(feature = "pulley")] let src_isa_pulley_shared = codegen_crate_dir .join("src") .join("isa") @@ -166,6 +169,7 @@ pub fn get_isle_compilations( untracked_inputs: vec![clif_lower_isle.clone()], }, // The Pulley instruction selector. + #[cfg(feature = "pulley")] IsleCompilation { name: "pulley".to_string(), output: gen_dir.join("isle_pulley_shared.rs"), @@ -175,7 +179,7 @@ pub fn get_isle_compilations( src_isa_pulley_shared.join("inst.isle"), src_isa_pulley_shared.join("lower.isle"), ], - untracked_inputs: vec![clif_lower_isle.clone()], + untracked_inputs: vec![pulley_gen.clone(), clif_lower_isle.clone()], }, ], } diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 480719d045d1..188e7e7ffbfb 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -20,6 +20,9 @@ mod constant_hash; mod shared; mod unique_table; +#[cfg(feature = "pulley")] +mod pulley; + /// Generate an ISA from an architecture string (e.g. "x86_64"). pub fn isa_from_arch(arch: &str) -> Result { isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{arch}`")) @@ -63,6 +66,11 @@ fn generate_rust_for_shared_defs( )?; } + #[cfg(feature = "pulley")] + if isas.contains(&isa::Isa::Pulley32) || isas.contains(&isa::Isa::Pulley64) { + pulley::generate_rust("pulley_inst_gen.rs", out_dir)?; + } + Ok(()) } @@ -82,7 +90,12 @@ fn generate_isle_for_shared_defs( "clif_opt.isle", "clif_lower.isle", isle_dir, - ) + )?; + + #[cfg(feature = "pulley")] + pulley::generate_isle("pulley_gen.isle", isle_dir)?; + + Ok(()) } /// Generates all the source files used in Cranelift from the meta-language. diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs new file mode 100644 index 000000000000..90717fb0c7d9 --- /dev/null +++ b/cranelift/codegen/meta/src/pulley.rs @@ -0,0 +1,331 @@ +use crate::error::Error; +use std::path::Path; + +struct Inst<'a> { + snake_name: &'a str, + name: &'a str, + fields: &'a [(&'a str, &'a str)], +} + +macro_rules! define { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { $( $field:ident : $field_ty:ty ),* } )? ; + )* + ) => { + &[$(Inst { + snake_name: stringify!($snake_name), + name: stringify!($name), + fields: &[$($( (stringify!($field), stringify!($field_ty)), )*)?], + }),*] + // helpers.push_str(concat!("(define pulley_", stringify!($snake_name), " (")); + }; +} + +const OPS: &[Inst<'_>] = pulley_interpreter::for_each_op!(define); +const EXTENDED_OPS: &[Inst<'_>] = pulley_interpreter::for_each_extended_op!(define); + +enum Operand<'a> { + Normal { name: &'a str, ty: &'a str }, + Writable { name: &'a str, ty: &'a str }, + Binop { reg: &'a str }, +} + +impl Inst<'_> { + fn operands(&self) -> impl Iterator> { + self.fields.iter().map(|(name, ty)| match (*name, *ty) { + ("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" }, + (name, "RegSet < XReg >") => Operand::Normal { + name, + ty: "VecXReg", + }, + ("dst", ty) => Operand::Writable { name, ty }, + (name, ty) => Operand::Normal { name, ty }, + }) + } + + fn skip(&self) -> bool { + match self.name { + // Skip instructions related to control-flow as those require + // special handling with `MachBuffer`. + "Jump" | "Call" | "CallIndirect" => true, + + // Skip special instructions not used in Cranelift. + "XPush32Many" | "XPush64Many" | "XPop32Many" | "XPop64Many" => true, + + // The pulley backend has its own trap-with-trap-code. + "Trap" => true, + + // Skip more branching-related instructions. + n => n.starts_with("Br"), + } + } +} + +pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { + let mut rust = String::new(); + + // Generate a pretty-printing method for debugging. + rust.push_str("pub fn print(inst: &RawInst) -> String {\n"); + rust.push_str("match inst {\n"); + for inst @ Inst { name, .. } in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut locals = String::new(); + let mut format_string = String::new(); + format_string.push_str(inst.snake_name); + for (i, op) in inst.operands().enumerate() { + match op { + Operand::Normal { name, ty } | Operand::Writable { name, ty } => { + pat.push_str(name); + pat.push_str(","); + + if i > 0 { + format_string.push_str(","); + } + format_string.push_str(" {"); + format_string.push_str(name); + format_string.push_str("}"); + + if ty.contains("Reg") { + if name == "dst" { + locals.push_str(&format!("let {name} = reg_name(*{name}.to_reg());\n")); + } else { + locals.push_str(&format!("let {name} = reg_name(**{name});\n")); + } + } + } + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + format_string.push_str(" {dst}, {src1}, {src2}"); + locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n")); + locals.push_str(&format!("let src1 = reg_name(**src1);\n")); + locals.push_str(&format!("let src2 = reg_name(**src2);\n")); + } + } + } + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} }} => {{ + {locals} + format!(\"{format_string}\") + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + // Generate `get_operands` to feed information to regalloc + rust.push_str( + "pub fn get_operands(inst: &mut RawInst, collector: &mut impl OperandVisitor) {\n", + ); + rust.push_str("match inst {\n"); + for inst @ Inst { name, .. } in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut uses = Vec::new(); + let mut defs = Vec::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } => { + if ty.contains("Reg") { + uses.push(name); + pat.push_str(name); + pat.push_str(","); + } + } + Operand::Writable { name, ty } => { + if ty.contains("Reg") { + defs.push(name); + pat.push_str(name); + pat.push_str(","); + } + } + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + uses.push("src1"); + uses.push("src2"); + defs.push("dst"); + } + } + } + + let uses = uses + .iter() + .map(|u| format!("collector.reg_use({u});\n")) + .collect::(); + let defs = defs + .iter() + .map(|u| format!("collector.reg_def({u});\n")) + .collect::(); + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} .. }} => {{ + {uses} + {defs} + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + // Generate an emission method + rust.push_str("pub fn emit

(inst: &RawInst, sink: &mut MachBuffer>)\n"); + rust.push_str(" where P: PulleyTargetKind,\n"); + rust.push_str("{\n"); + rust.push_str("match *inst {\n"); + for inst @ Inst { + name, snake_name, .. + } in OPS.iter().chain(EXTENDED_OPS) + { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut args = String::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty: _ } | Operand::Writable { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + + args.push_str(name); + args.push_str(","); + } + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + args.push_str( + "pulley_interpreter::regs::BinaryOperands::new(dst, src1, src2),", + ); + } + } + } + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} }} => {{ + pulley_interpreter::encode::{snake_name}(sink, {args}) + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + std::fs::write(out_dir.join(filename), rust)?; + Ok(()) +} + +pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { + let mut isle = String::new(); + + // Generate the `RawInst` enum + isle.push_str("(type RawInst (enum\n"); + for inst in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + isle.push_str(" ("); + isle.push_str(inst.name); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } => { + isle.push_str(&format!("\n ({name} {ty})")); + } + Operand::Writable { name, ty } => { + isle.push_str(&format!("\n ({name} Writable{ty})")); + } + Operand::Binop { reg } => { + isle.push_str(&format!("\n (dst Writable{reg})")); + isle.push_str(&format!("\n (src1 {reg})")); + isle.push_str(&format!("\n (src2 {reg})")); + } + } + } + isle.push_str(")\n"); + } + isle.push_str("))\n"); + + // Generate the `pulley_*` constructors with a `decl` and a `rule`. + for inst @ Inst { + name, snake_name, .. + } in OPS.iter().chain(EXTENDED_OPS) + { + if inst.skip() { + continue; + } + // generate `decl` and `rule` at the same time, placing the `rule` in + // temporary storage on the side. Makes generation a bit easier to read + // as opposed to doing the decl first then the rule. + let mut rule = String::new(); + isle.push_str(&format!("(decl pulley_{snake_name} (")); + rule.push_str(&format!("(rule (pulley_{snake_name} ")); + let mut result = None; + let mut ops = Vec::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } => { + isle.push_str(ty); + rule.push_str(name); + ops.push(name); + } + Operand::Writable { name: _, ty } => { + assert!(result.is_none(), "{} has >1 result", inst.snake_name); + result = Some(ty); + } + Operand::Binop { reg } => { + isle.push_str(&format!("{reg} {reg}")); + rule.push_str("src1 src2"); + ops.push("src1"); + ops.push("src2"); + assert!(result.is_none(), "{} has >1 result", inst.snake_name); + result = Some(reg); + } + } + isle.push_str(" "); + rule.push_str(" "); + } + isle.push_str(") "); + rule.push_str(")"); + let ops = ops.join(" "); + match result { + Some(result) => { + isle.push_str(result); + rule.push_str(&format!( + " + (let ( + (dst Writable{result} (temp_writable_{})) + (_ Unit (emit (RawInst.{name} dst {ops}))) + ) + dst))\ +\n", + result.to_lowercase() + )); + } + None => { + isle.push_str("SideEffectNoResult"); + rule.push_str(&format!( + " (SideEffectNoResult.Inst (RawInst.{name} {ops})))\n", + )); + } + } + isle.push_str(")\n"); + + isle.push_str(&rule); + } + + std::fs::write(out_dir.join(filename), isle)?; + Ok(()) +} diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 831dbe282865..f65ac69eec55 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -182,12 +182,12 @@ where let src = XReg::new(src).unwrap(); let dst = dst.try_into().unwrap(); match (signed, from_bits) { - (true, 8) => Inst::Sext8 { dst, src }.into(), - (true, 16) => Inst::Sext16 { dst, src }.into(), - (true, 32) => Inst::Sext32 { dst, src }.into(), - (false, 8) => Inst::Zext8 { dst, src }.into(), - (false, 16) => Inst::Zext16 { dst, src }.into(), - (false, 32) => Inst::Zext32 { dst, src }.into(), + (true, 8) => RawInst::Sext8 { dst, src }.into(), + (true, 16) => RawInst::Sext16 { dst, src }.into(), + (true, 32) => RawInst::Sext32 { dst, src }.into(), + (false, 8) => RawInst::Zext8 { dst, src }.into(), + (false, 16) => RawInst::Zext16 { dst, src }.into(), + (false, 32) => RawInst::Zext32 { dst, src }.into(), _ => unimplemented!("extend {from_bits} to {to_bits} as signed? {signed}"), } } @@ -220,8 +220,8 @@ where let dst = into_reg.try_into().unwrap(); let imm = imm as i32; smallvec![ - Inst::Xconst32 { dst, imm }.into(), - Inst::Xadd32 { + RawInst::Xconst32 { dst, imm }.into(), + RawInst::Xadd32 { dst, src1: from_reg.try_into().unwrap(), src2: dst.to_reg(), @@ -261,13 +261,13 @@ where let inst = if amount < 0 { let amount = amount.checked_neg().unwrap(); if let Ok(amt) = u32::try_from(amount) { - Inst::StackAlloc32 { amt } + RawInst::StackAlloc32 { amt } } else { unreachable!() } } else { if let Ok(amt) = u32::try_from(amount) { - Inst::StackFree32 { amt } + RawInst::StackFree32 { amt } } else { unreachable!() } @@ -284,7 +284,7 @@ where let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { - insts.push(Inst::PushFrame.into()); + insts.push(RawInst::PushFrame.into()); if flags.unwind_info() { insts.push( Inst::Unwind { @@ -310,7 +310,7 @@ where let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { - insts.push(Inst::PopFrame.into()); + insts.push(RawInst::PopFrame.into()); } if frame_layout.tail_args_size > 0 { @@ -327,7 +327,7 @@ where _isa_flags: &PulleyFlags, _frame_layout: &FrameLayout, ) -> SmallInstVec { - smallvec![Inst::Ret {}.into()] + smallvec![RawInst::Ret {}.into()] } fn gen_probestack(_insts: &mut SmallInstVec, _frame_size: u32) { diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index c80cacd0b5b1..59c720a605b8 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -45,9 +45,6 @@ (dst WritableXReg) (reg XReg)) - ;; Return. - (Ret) - ;; Load an external symbol's address into a register. (LoadExtName (dst WritableXReg) (name BoxExternalName) @@ -77,35 +74,6 @@ (BrIfXult32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) (BrIfXulteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - ;; Register-to-register moves. - (Xmov (dst WritableXReg) (src XReg)) - (Fmov (dst WritableFReg) (src FReg)) - (Vmov (dst WritableVReg) (src VReg)) - - ;; Integer constants, zero-extended to 64 bits. - (Xconst8 (dst WritableXReg) (imm i8)) - (Xconst16 (dst WritableXReg) (imm i16)) - (Xconst32 (dst WritableXReg) (imm i32)) - (Xconst64 (dst WritableXReg) (imm i64)) - - ;; Integer arithmetic. - (Xadd32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xadd64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - - ;; Comparisons. - (Xeq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xneq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslt64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xult64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xulteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xeq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xneq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslt32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xult32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xulteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - ;; Load the memory address referenced by `mem` into `dst`. (LoadAddr (dst WritableXReg) (mem Amode)) @@ -115,32 +83,18 @@ ;; Stores. (Store (mem Amode) (src Reg) (ty Type) (flags MemFlags)) - ;; Bitcasts. - (BitcastIntFromFloat32 (dst WritableXReg) (src FReg)) - (BitcastIntFromFloat64 (dst WritableXReg) (src FReg)) - (BitcastFloatFromInt32 (dst WritableFReg) (src XReg)) - (BitcastFloatFromInt64 (dst WritableFReg) (src XReg)) - - ;; Stack manipulations - (PushFrame) - (PopFrame) - (StackAlloc32 (amt u32)) - (StackFree32 (amt u32)) - - ;; Sign extensions. - (Zext8 (dst WritableXReg) (src XReg)) - (Zext16 (dst WritableXReg) (src XReg)) - (Zext32 (dst WritableXReg) (src XReg)) - (Sext8 (dst WritableXReg) (src XReg)) - (Sext16 (dst WritableXReg) (src XReg)) - (Sext32 (dst WritableXReg) (src XReg)) - - ;; Byte swaps. - (Bswap32 (dst WritableXReg) (src XReg)) - (Bswap64 (dst WritableXReg) (src XReg)) + ;; A raw pulley instruction generated at compile-time via Pulley's + ;; `for_each_op!` macro. This variant has `pulley_*` constructors to + ;; emit this instruction and auto-generated methods for other various + ;; bits and pieces of boilerplate in the backend. + (Raw (raw RawInst)) ) ) +(decl raw_inst_to_inst (RawInst) MInst) +(rule (raw_inst_to_inst inst) (MInst.Raw inst)) +(convert RawInst MInst raw_inst_to_inst) + (type BoxCallInfo (primitive BoxCallInfo)) (type BoxCallIndInfo (primitive BoxCallIndInfo)) @@ -414,30 +368,6 @@ (_ Unit (emit (MInst.GetSpecial dst reg)))) dst)) -(decl pulley_xconst8 (i8) XReg) -(rule (pulley_xconst8 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst8 reg x)))) - reg)) - -(decl pulley_xconst16 (i16) XReg) -(rule (pulley_xconst16 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst16 reg x)))) - reg)) - -(decl pulley_xconst32 (i32) XReg) -(rule (pulley_xconst32 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst32 reg x)))) - reg)) - -(decl pulley_xconst64 (i64) XReg) -(rule (pulley_xconst64 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst64 reg x)))) - reg)) - (decl pulley_jump (MachLabel) SideEffectNoResult) (rule (pulley_jump label) (SideEffectNoResult.Inst (MInst.Jump label))) @@ -470,90 +400,6 @@ (rule (pulley_br_if_xulteq32 a b taken not_taken) (SideEffectNoResult.Inst (MInst.BrIfXulteq32 a b taken not_taken))) -(decl pulley_xadd32 (XReg XReg) XReg) -(rule (pulley_xadd32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xadd32 dst a b)))) - dst)) - -(decl pulley_xadd64 (XReg XReg) XReg) -(rule (pulley_xadd64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xadd64 dst a b)))) - dst)) - -(decl pulley_xeq64 (XReg XReg) XReg) -(rule (pulley_xeq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xeq64 dst a b)))) - dst)) - -(decl pulley_xneq64 (XReg XReg) XReg) -(rule (pulley_xneq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xneq64 dst a b)))) - dst)) - -(decl pulley_xslt64 (XReg XReg) XReg) -(rule (pulley_xslt64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslt64 dst a b)))) - dst)) - -(decl pulley_xslteq64 (XReg XReg) XReg) -(rule (pulley_xslteq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslteq64 dst a b)))) - dst)) - -(decl pulley_xult64 (XReg XReg) XReg) -(rule (pulley_xult64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xult64 dst a b)))) - dst)) - -(decl pulley_xulteq64 (XReg XReg) XReg) -(rule (pulley_xulteq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xulteq64 dst a b)))) - dst)) - -(decl pulley_xeq32 (XReg XReg) XReg) -(rule (pulley_xeq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xeq32 dst a b)))) - dst)) - -(decl pulley_xneq32 (XReg XReg) XReg) -(rule (pulley_xneq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xneq32 dst a b)))) - dst)) - -(decl pulley_xslt32 (XReg XReg) XReg) -(rule (pulley_xslt32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslt32 dst a b)))) - dst)) - -(decl pulley_xslteq32 (XReg XReg) XReg) -(rule (pulley_xslteq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslteq32 dst a b)))) - dst)) - -(decl pulley_xult32 (XReg XReg) XReg) -(rule (pulley_xult32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xult32 dst a b)))) - dst)) - -(decl pulley_xulteq32 (XReg XReg) XReg) -(rule (pulley_xulteq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xulteq32 dst a b)))) - dst)) - (decl pulley_load (Amode Type MemFlags ExtKind) Reg) (rule (pulley_load amode ty flags ext) (let ((dst WritableReg (temp_writable_reg ty)) @@ -564,82 +410,10 @@ (rule (pulley_store amode src ty flags) (SideEffectNoResult.Inst (MInst.Store amode src ty flags))) -(decl pulley_bitcast_float_from_int_32 (XReg) FReg) -(rule (pulley_bitcast_float_from_int_32 src) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.BitcastFloatFromInt32 dst src)))) - dst)) - -(decl pulley_bitcast_float_from_int_64 (XReg) FReg) -(rule (pulley_bitcast_float_from_int_64 src) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.BitcastFloatFromInt64 dst src)))) - dst)) - -(decl pulley_bitcast_int_from_float_32 (FReg) XReg) -(rule (pulley_bitcast_int_from_float_32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.BitcastIntFromFloat32 dst src)))) - dst)) - -(decl pulley_bitcast_int_from_float_64 (FReg) XReg) -(rule (pulley_bitcast_int_from_float_64 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.BitcastIntFromFloat64 dst src)))) - dst)) - (decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit) (rule (gen_br_table idx default labels) (emit (MInst.BrTable idx default labels))) -(decl pulley_zext8 (XReg) XReg) -(rule (pulley_zext8 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext8 dst src)))) - dst)) - -(decl pulley_zext16 (XReg) XReg) -(rule (pulley_zext16 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext16 dst src)))) - dst)) - -(decl pulley_zext32 (XReg) XReg) -(rule (pulley_zext32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext32 dst src)))) - dst)) - -(decl pulley_sext8 (XReg) XReg) -(rule (pulley_sext8 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext8 dst src)))) - dst)) - -(decl pulley_sext16 (XReg) XReg) -(rule (pulley_sext16 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext16 dst src)))) - dst)) - -(decl pulley_sext32 (XReg) XReg) -(rule (pulley_sext32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext32 dst src)))) - dst)) - -(decl pulley_bswap32 (XReg) XReg) -(rule (pulley_bswap32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Bswap32 dst src)))) - dst)) - -(decl pulley_bswap64 (XReg) XReg) -(rule (pulley_bswap64 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Bswap64 dst src)))) - dst)) - ;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 78c13ed96e74..a4b862fad2ae 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -180,8 +180,6 @@ fn pulley_emit

( Inst::GetSpecial { dst, reg } => enc::xmov(sink, dst, reg), - Inst::Ret => enc::ret(sink), - Inst::LoadExtName { .. } => todo!(), Inst::Call { info } => { @@ -384,39 +382,6 @@ fn pulley_emit

( ); } - Inst::Xmov { dst, src } => enc::xmov(sink, dst, src), - Inst::Fmov { dst, src } => enc::fmov(sink, dst, src), - Inst::Vmov { dst, src } => enc::vmov(sink, dst, src), - - Inst::Xconst8 { dst, imm } => enc::xconst8(sink, dst, *imm), - Inst::Xconst16 { dst, imm } => enc::xconst16(sink, dst, *imm), - Inst::Xconst32 { dst, imm } => enc::xconst32(sink, dst, *imm), - Inst::Xconst64 { dst, imm } => enc::xconst64(sink, dst, *imm), - - Inst::Xadd32 { dst, src1, src2 } => enc::xadd32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xadd64 { dst, src1, src2 } => enc::xadd64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xeq64 { dst, src1, src2 } => enc::xeq64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xneq64 { dst, src1, src2 } => enc::xneq64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslt64 { dst, src1, src2 } => enc::xslt64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslteq64 { dst, src1, src2 } => { - enc::xslteq64(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::Xult64 { dst, src1, src2 } => enc::xult64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xulteq64 { dst, src1, src2 } => { - enc::xulteq64(sink, BinaryOperands::new(dst, src1, src2)) - } - - Inst::Xeq32 { dst, src1, src2 } => enc::xeq32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xneq32 { dst, src1, src2 } => enc::xneq32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslt32 { dst, src1, src2 } => enc::xslt32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslteq32 { dst, src1, src2 } => { - enc::xslteq32(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::Xult32 { dst, src1, src2 } => enc::xult32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xulteq32 { dst, src1, src2 } => { - enc::xulteq32(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::LoadAddr { dst, mem } => { let base = mem.get_base_register(); let offset = mem.get_offset_with_state(state); @@ -503,11 +468,6 @@ fn pulley_emit

( } } - Inst::BitcastIntFromFloat32 { dst, src } => enc::bitcast_int_from_float_32(sink, dst, src), - Inst::BitcastIntFromFloat64 { dst, src } => enc::bitcast_int_from_float_64(sink, dst, src), - Inst::BitcastFloatFromInt32 { dst, src } => enc::bitcast_float_from_int_32(sink, dst, src), - Inst::BitcastFloatFromInt64 { dst, src } => enc::bitcast_float_from_int_64(sink, dst, src), - Inst::BrTable { idx, default, @@ -549,25 +509,15 @@ fn pulley_emit

( *start_offset = sink.cur_offset(); } - Inst::PushFrame => { - sink.add_trap(ir::TrapCode::STACK_OVERFLOW); - enc::push_frame(sink); - } - Inst::PopFrame => enc::pop_frame(sink), - Inst::StackAlloc32 { amt } => { - sink.add_trap(ir::TrapCode::STACK_OVERFLOW); - enc::stack_alloc32(sink, *amt); + Inst::Raw { raw } => { + match raw { + RawInst::PushFrame | RawInst::StackAlloc32 { .. } => { + sink.add_trap(ir::TrapCode::STACK_OVERFLOW); + } + _ => {} + } + super::generated::emit(raw, sink) } - Inst::StackFree32 { amt } => enc::stack_free32(sink, *amt), - - Inst::Zext8 { dst, src } => enc::zext8(sink, dst, src), - Inst::Zext16 { dst, src } => enc::zext16(sink, dst, src), - Inst::Zext32 { dst, src } => enc::zext32(sink, dst, src), - Inst::Sext8 { dst, src } => enc::sext8(sink, dst, src), - Inst::Sext16 { dst, src } => enc::sext16(sink, dst, src), - Inst::Sext32 { dst, src } => enc::sext32(sink, dst, src), - Inst::Bswap32 { dst, src } => enc::bswap32(sink, dst, src), - Inst::Bswap64 { dst, src } => enc::bswap64(sink, dst, src), } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 460ad9fe30db..dd71f31050bf 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -24,9 +24,23 @@ pub use self::emit::*; // Instructions (top level): definition pub use crate::isa::pulley_shared::lower::isle::generated_code::MInst as Inst; +pub use crate::isa::pulley_shared::lower::isle::generated_code::RawInst; + +impl From for Inst { + fn from(raw: RawInst) -> Inst { + Inst::Raw { raw } + } +} use super::PulleyTargetKind; +mod generated { + use super::*; + use crate::isa::pulley_shared::lower::isle::generated_code::RawInst; + + include!(concat!(env!("OUT_DIR"), "/pulley_inst_gen.rs")); +} + impl Inst { /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(dst: Writable, mem: Amode, ty: Type, flags: MemFlags) -> Inst { @@ -62,9 +76,6 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_fixed_use(vreg, *preg); } } - Inst::Ret => { - unreachable!("`ret` is only added after regalloc") - } Inst::Unwind { .. } | Inst::Trap { .. } | Inst::Nop => {} @@ -167,45 +178,6 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(src2); } - Inst::Xmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::Fmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::Vmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - - Inst::Xconst8 { dst, imm: _ } - | Inst::Xconst16 { dst, imm: _ } - | Inst::Xconst32 { dst, imm: _ } - | Inst::Xconst64 { dst, imm: _ } => { - collector.reg_def(dst); - } - - Inst::Xadd32 { dst, src1, src2 } - | Inst::Xadd64 { dst, src1, src2 } - | Inst::Xeq64 { dst, src1, src2 } - | Inst::Xneq64 { dst, src1, src2 } - | Inst::Xslt64 { dst, src1, src2 } - | Inst::Xslteq64 { dst, src1, src2 } - | Inst::Xult64 { dst, src1, src2 } - | Inst::Xulteq64 { dst, src1, src2 } - | Inst::Xeq32 { dst, src1, src2 } - | Inst::Xneq32 { dst, src1, src2 } - | Inst::Xslt32 { dst, src1, src2 } - | Inst::Xslteq32 { dst, src1, src2 } - | Inst::Xult32 { dst, src1, src2 } - | Inst::Xulteq32 { dst, src1, src2 } => { - collector.reg_use(src1); - collector.reg_use(src2); - collector.reg_def(dst); - } - Inst::LoadAddr { dst, mem } => { collector.reg_def(dst); mem.get_operands(collector); @@ -232,41 +204,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(src); } - Inst::BitcastIntFromFloat32 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::BitcastIntFromFloat64 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::BitcastFloatFromInt32 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::BitcastFloatFromInt64 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::BrTable { idx, .. } => { collector.reg_use(idx); } - Inst::StackAlloc32 { .. } | Inst::StackFree32 { .. } | Inst::PushFrame | Inst::PopFrame => { - } - - Inst::Zext8 { dst, src } - | Inst::Zext16 { dst, src } - | Inst::Zext32 { dst, src } - | Inst::Sext8 { dst, src } - | Inst::Sext16 { dst, src } - | Inst::Sext32 { dst, src } - | Inst::Bswap32 { dst, src } - | Inst::Bswap64 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } + Inst::Raw { raw } => generated::get_operands(raw, collector), } } @@ -296,6 +238,18 @@ where } } +impl

From for InstAndKind

+where + P: PulleyTargetKind, +{ + fn from(inst: RawInst) -> Self { + Self { + inst: inst.into(), + kind: PhantomData, + } + } +} + impl

From> for Inst where P: PulleyTargetKind, @@ -359,7 +313,9 @@ where fn is_move(&self) -> Option<(Writable, Reg)> { match self.inst { - Inst::Xmov { dst, src } => Some((Writable::from_reg(*dst.to_reg()), *src)), + Inst::Raw { + raw: RawInst::Xmov { dst, src }, + } => Some((Writable::from_reg(*dst.to_reg()), *src)), _ => None, } } @@ -384,7 +340,10 @@ where fn is_term(&self) -> MachTerminator { match self.inst { - Inst::Ret { .. } | Inst::Rets { .. } => MachTerminator::Ret, + Inst::Raw { + raw: RawInst::Ret { .. }, + } + | Inst::Rets { .. } => MachTerminator::Ret, Inst::Jump { .. } => MachTerminator::Uncond, Inst::BrIf { .. } | Inst::BrIfXeq32 { .. } @@ -404,17 +363,17 @@ where fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self { match ty { - ir::types::I8 | ir::types::I16 | ir::types::I32 | ir::types::I64 => Inst::Xmov { + ir::types::I8 | ir::types::I16 | ir::types::I32 | ir::types::I64 => RawInst::Xmov { dst: WritableXReg::try_from(to_reg).unwrap(), src: XReg::new(from_reg).unwrap(), } .into(), - ir::types::F32 | ir::types::F64 => Inst::Fmov { + ir::types::F32 | ir::types::F64 => RawInst::Fmov { dst: WritableFReg::try_from(to_reg).unwrap(), src: FReg::new(from_reg).unwrap(), } .into(), - _ if ty.is_vector() => Inst::Vmov { + _ if ty.is_vector() => RawInst::Vmov { dst: WritableVReg::try_from(to_reg).unwrap(), src: VReg::new(from_reg).unwrap(), } @@ -585,8 +544,6 @@ impl Inst { Inst::Nop => format!("nop"), - Inst::Ret => format!("ret"), - Inst::GetSpecial { dst, reg } => { let dst = format_reg(*dst.to_reg()); let reg = format_reg(**reg); @@ -697,125 +654,6 @@ impl Inst { format!("br_if_xulteq32 {src1}, {src2}, {taken}; jump {not_taken}") } - Inst::Xmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = xmov {src}") - } - Inst::Fmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = fmov {src}") - } - Inst::Vmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = vmov {src}") - } - - Inst::Xconst8 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst8 {imm}") - } - Inst::Xconst16 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst16 {imm}") - } - Inst::Xconst32 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst32 {imm}") - } - Inst::Xconst64 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst64 {imm}") - } - - Inst::Xadd32 { dst, src1, src2 } => format!( - "{} = xadd32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xadd64 { dst, src1, src2 } => format!( - "{} = xadd64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - - Inst::Xeq64 { dst, src1, src2 } => format!( - "{} = xeq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xneq64 { dst, src1, src2 } => format!( - "{} = xneq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslt64 { dst, src1, src2 } => format!( - "{} = xslt64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslteq64 { dst, src1, src2 } => format!( - "{} = xslteq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xult64 { dst, src1, src2 } => format!( - "{} = xult64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xulteq64 { dst, src1, src2 } => format!( - "{} = xulteq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xeq32 { dst, src1, src2 } => format!( - "{} = xeq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xneq32 { dst, src1, src2 } => format!( - "{} = xneq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslt32 { dst, src1, src2 } => format!( - "{} = xslt32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslteq32 { dst, src1, src2 } => format!( - "{} = xslteq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xult32 { dst, src1, src2 } => format!( - "{} = xult32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xulteq32 { dst, src1, src2 } => format!( - "{} = xulteq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::LoadAddr { dst, mem } => { let dst = format_reg(*dst.to_reg()); let mem = mem.to_string(); @@ -848,27 +686,6 @@ impl Inst { format!("store{ty} {mem}, {src} // flags = {flags}") } - Inst::BitcastIntFromFloat32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_int_from_float32 {src}") - } - Inst::BitcastIntFromFloat64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_int_from_float64 {src}") - } - Inst::BitcastFloatFromInt32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_float_from_int32 {src}") - } - Inst::BitcastFloatFromInt64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_float_from_int64 {src}") - } - Inst::BrTable { idx, default, @@ -877,56 +694,7 @@ impl Inst { let idx = format_reg(**idx); format!("br_table {idx} {default:?} {targets:?}") } - - Inst::StackAlloc32 { amt } => { - format!("stack_alloc32 {amt:#x}") - } - Inst::StackFree32 { amt } => { - format!("stack_free32 {amt:#x}") - } - Inst::PushFrame => format!("push_frame"), - Inst::PopFrame => format!("pop_frame"), - - Inst::Zext8 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext8 {dst}, {src}") - } - Inst::Zext16 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext16 {dst}, {src}") - } - Inst::Zext32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext32 {dst}, {src}") - } - Inst::Sext8 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext8 {dst}, {src}") - } - Inst::Sext16 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext16 {dst}, {src}") - } - Inst::Sext32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext32 {dst}, {src}") - } - Inst::Bswap32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("bswap32 {dst}, {src}") - } - Inst::Bswap64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("bswap64 {dst}, {src}") - } + Inst::Raw { raw } => generated::print(raw), } } } diff --git a/cranelift/filetests/filetests/isa/pulley32/br_table.clif b/cranelift/filetests/filetests/isa/pulley32/br_table.clif index 624b808192c1..c9f6844ad463 100644 --- a/cranelift/filetests/filetests/isa/pulley32/br_table.clif +++ b/cranelift/filetests/filetests/isa/pulley32/br_table.clif @@ -34,19 +34,19 @@ block5(v5: i32): ; block2: ; jump label4 ; block3: -; x5 = xconst8 3 +; xconst8 x5, 3 ; jump label7 ; block4: -; x5 = xconst8 2 +; xconst8 x5, 2 ; jump label7 ; block5: -; x5 = xconst8 1 +; xconst8 x5, 1 ; jump label7 ; block6: -; x5 = xconst8 4 +; xconst8 x5, 4 ; jump label7 ; block7: -; x0 = xadd32 x0, x5 +; xadd32 x0, x0, x5 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif index 5ccc545b7371..3e51993a7630 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif @@ -19,10 +19,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -50,10 +50,10 @@ block2: ; block0: ; br_if_xneq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -81,10 +81,10 @@ block2: ; block0: ; br_if_xult32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -112,10 +112,10 @@ block2: ; block0: ; br_if_xulteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -143,10 +143,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -174,10 +174,10 @@ block2: ; block0: ; br_if_xslteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xult32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -236,10 +236,10 @@ block2: ; block0: ; br_if_xulteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -267,10 +267,10 @@ block2: ; block0: ; br_if_xslt32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -298,10 +298,10 @@ block2: ; block0: ; br_if_xslteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -330,10 +330,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index 36c279c54d22..cc6bb012eb28 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -18,10 +18,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -48,10 +48,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -78,10 +78,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -108,10 +108,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -137,13 +137,13 @@ block2: ; VCode: ; block0: -; x5 = xeq32 x0, x1 +; xeq32 x5, x0, x1 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -170,13 +170,13 @@ block2: ; VCode: ; block0: -; x5 = xneq32 x0, x1 +; xneq32 x5, x0, x1 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -234,13 +234,13 @@ block2: ; VCode: ; block0: -; x5 = xulteq64 x1, x0 +; xulteq64 x5, x1, x0 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6a162d085b50..ab77f37ab5a5 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -15,9 +15,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -42,9 +42,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -71,10 +71,10 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 -; x1 = xconst8 1 -; x2 = xconst8 2 -; x3 = xconst8 3 +; xconst8 x0, 0 +; xconst8 x1, 1 +; xconst8 x2, 2 +; xconst8 x3, 3 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret @@ -104,9 +104,9 @@ block0: ; push_frame ; block0: ; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x4 = xadd64 x0, x2 -; x3 = xadd64 x1, x3 -; x0 = xadd64 x4, x3 +; xadd64 x4, x0, x2 +; xadd64 x3, x1, x3 +; xadd64 x0, x4, x3 ; pop_frame ; ret ; @@ -130,32 +130,32 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x30 +; stack_alloc32 48 ; block0: -; x15 = xconst8 0 +; xconst8 x15, 0 ; store64 OutgoingArg(0), x15 // flags = notrap aligned ; store64 OutgoingArg(8), x15 // flags = notrap aligned ; store64 OutgoingArg(16), x15 // flags = notrap aligned ; store64 OutgoingArg(24), x15 // flags = notrap aligned ; store64 OutgoingArg(32), x15 // flags = notrap aligned ; store64 OutgoingArg(40), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x30 +; stack_free32 48 ; pop_frame ; ret ; @@ -227,47 +227,47 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 +; stack_alloc32 64 ; store64 sp+56, x18 // flags = notrap aligned ; store64 sp+48, x20 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x18 = xmov x13 -; x20 = xmov x11 +; xmov x18, x13 +; xmov x20, x11 ; x24 = load64_u OutgoingArg(0) // flags = notrap aligned ; x11 = load64_u OutgoingArg(8) // flags = notrap aligned ; x13 = load64_u OutgoingArg(16) // flags = notrap aligned ; x19 = load64_u OutgoingArg(24) // flags = notrap aligned ; x21 = load64_u OutgoingArg(32) // flags = notrap aligned -; x25 = xadd64 x0, x1 -; x23 = xadd64 x2, x3 -; x5 = xadd64 x4, x5 -; x6 = xadd64 x6, x7 -; x7 = xadd64 x8, x9 -; x0 = xmov x20 -; x4 = xadd64 x10, x0 -; x10 = xmov x18 -; x8 = xadd64 x12, x10 -; x14 = xadd64 x14, x15 -; x15 = xadd64 x24, x11 -; x13 = xadd64 x11, x13 -; x0 = xadd64 x19, x21 -; x1 = xadd64 x25, x23 -; x2 = xadd64 x5, x6 -; x3 = xadd64 x7, x4 -; x14 = xadd64 x8, x14 -; x13 = xadd64 x15, x13 -; x15 = xadd64 x0, x0 -; x0 = xadd64 x1, x2 -; x14 = xadd64 x3, x14 -; x13 = xadd64 x13, x15 -; x14 = xadd64 x0, x14 -; x13 = xadd64 x13, x13 -; x0 = xadd64 x14, x13 +; xadd64 x25, x0, x1 +; xadd64 x23, x2, x3 +; xadd64 x5, x4, x5 +; xadd64 x6, x6, x7 +; xadd64 x7, x8, x9 +; xmov x0, x20 +; xadd64 x4, x10, x0 +; xmov x10, x18 +; xadd64 x8, x12, x10 +; xadd64 x14, x14, x15 +; xadd64 x15, x24, x11 +; xadd64 x13, x11, x13 +; xadd64 x0, x19, x21 +; xadd64 x1, x25, x23 +; xadd64 x2, x5, x6 +; xadd64 x3, x7, x4 +; xadd64 x14, x8, x14 +; xadd64 x13, x15, x13 +; xadd64 x15, x0, x0 +; xadd64 x0, x1, x2 +; xadd64 x14, x3, x14 +; xadd64 x13, x13, x15 +; xadd64 x14, x0, x14 +; xadd64 x13, x13, x13 +; xadd64 x0, x14, x13 ; x18 = load64_u sp+56 // flags = notrap aligned ; x20 = load64_u sp+48 // flags = notrap aligned -; stack_free32 0x40 +; stack_free32 64 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley32/iadd.clif b/cranelift/filetests/filetests/isa/pulley32/iadd.clif index 1bde49304a87..908eb0662544 100644 --- a/cranelift/filetests/filetests/isa/pulley32/iadd.clif +++ b/cranelift/filetests/filetests/isa/pulley32/iadd.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xadd64 x0, x1 +; xadd64 x0, x0, x1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/icmp.clif b/cranelift/filetests/filetests/isa/pulley32/icmp.clif index 6926ca1400f7..8f2363f9e7db 100644 --- a/cranelift/filetests/filetests/isa/pulley32/icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley32/icmp.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xeq64 x0, x1 +; xeq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -69,7 +69,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -99,7 +99,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -114,7 +114,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xneq64 x0, x1 +; xneq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -129,7 +129,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -144,7 +144,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -159,7 +159,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -174,7 +174,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x0, x1 +; xult64 x0, x0, x1 ; ret ; ; Disassembled: @@ -189,7 +189,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -204,7 +204,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -219,7 +219,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -234,7 +234,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x0, x1 +; xulteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -249,7 +249,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -264,7 +264,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -279,7 +279,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -294,7 +294,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x0, x1 +; xslt64 x0, x0, x1 ; ret ; ; Disassembled: @@ -309,7 +309,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -324,7 +324,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -339,7 +339,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -354,7 +354,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x0, x1 +; xslteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -369,7 +369,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -384,7 +384,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -399,7 +399,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -414,7 +414,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x1, x0 +; xult64 x0, x1, x0 ; ret ; ; Disassembled: @@ -429,7 +429,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -444,7 +444,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -459,7 +459,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -474,7 +474,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x1, x0 +; xslt64 x0, x1, x0 ; ret ; ; Disassembled: @@ -489,7 +489,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -504,7 +504,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -519,7 +519,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -534,7 +534,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x1, x0 +; xulteq64 x0, x1, x0 ; ret ; ; Disassembled: @@ -549,7 +549,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -564,7 +564,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -579,7 +579,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -594,7 +594,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x1, x0 +; xslteq64 x0, x1, x0 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/iconst.clif b/cranelift/filetests/filetests/isa/pulley32/iconst.clif index 4ab8a5e48a7d..1aba8a5bf023 100644 --- a/cranelift/filetests/filetests/isa/pulley32/iconst.clif +++ b/cranelift/filetests/filetests/isa/pulley32/iconst.clif @@ -9,7 +9,7 @@ block0: ; VCode: ; block0: -; x0 = xconst16 255 +; xconst16 x0, 255 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 65535 +; xconst32 x0, 65535 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 -1 +; xconst32 x0, -1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0: ; VCode: ; block0: -; x0 = xconst64 -1 +; xconst64 x0, -1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/jump.clif b/cranelift/filetests/filetests/isa/pulley32/jump.clif index 6475294e95d1..5523088ad538 100644 --- a/cranelift/filetests/filetests/isa/pulley32/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley32/jump.clif @@ -21,10 +21,10 @@ block3(v3: i8): ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; jump label3 ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; jump label3 ; block3: ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif b/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif index ef6087e21c79..5c77917cf15c 100644 --- a/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif +++ b/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif @@ -10,10 +10,10 @@ block0(): ; VCode: ; push_frame -; stack_alloc32 0x10 +; stack_alloc32 16 ; block0: ; x0 = load_addr Slot(0) -; stack_free32 0x10 +; stack_free32 16 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index 03a23b970e5e..ab0757ad1eb5 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -23,7 +23,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -43,7 +43,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -63,7 +63,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -83,7 +83,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -114,8 +114,8 @@ block2: ; block1: ; ret ; block2: -; x5 = xconst8 42 -; x6 = xconst8 0 +; xconst8 x5, 42 +; xconst8 x6, 0 ; trap_if ne, Size64, x5, x6 // code = TrapCode(1) ; ret ; @@ -147,8 +147,8 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x4 = xconst8 0 -; x5 = xconst8 0 +; xconst8 x4, 0 +; xconst8 x5, 0 ; trap_if eq, Size64, x4, x5 // code = TrapCode(1) ; ret ; block2: diff --git a/cranelift/filetests/filetests/isa/pulley64/br_table.clif b/cranelift/filetests/filetests/isa/pulley64/br_table.clif index 8c334abc9be4..3adf03c6fedd 100644 --- a/cranelift/filetests/filetests/isa/pulley64/br_table.clif +++ b/cranelift/filetests/filetests/isa/pulley64/br_table.clif @@ -34,19 +34,19 @@ block5(v5: i32): ; block2: ; jump label4 ; block3: -; x5 = xconst8 3 +; xconst8 x5, 3 ; jump label7 ; block4: -; x5 = xconst8 2 +; xconst8 x5, 2 ; jump label7 ; block5: -; x5 = xconst8 1 +; xconst8 x5, 1 ; jump label7 ; block6: -; x5 = xconst8 4 +; xconst8 x5, 4 ; jump label7 ; block7: -; x0 = xadd32 x0, x5 +; xadd32 x0, x0, x5 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif index eeb01081a103..8a7ab52dd562 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif @@ -19,10 +19,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -50,10 +50,10 @@ block2: ; block0: ; br_if_xneq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -81,10 +81,10 @@ block2: ; block0: ; br_if_xult32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -112,10 +112,10 @@ block2: ; block0: ; br_if_xulteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -143,10 +143,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -174,10 +174,10 @@ block2: ; block0: ; br_if_xslteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xult32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -236,10 +236,10 @@ block2: ; block0: ; br_if_xulteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -267,10 +267,10 @@ block2: ; block0: ; br_if_xslt32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -298,10 +298,10 @@ block2: ; block0: ; br_if_xslteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -330,10 +330,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index a1b47f037589..0585a726bfe7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -18,10 +18,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -48,10 +48,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -78,10 +78,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -108,10 +108,10 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -137,13 +137,13 @@ block2: ; VCode: ; block0: -; x5 = xeq32 x0, x1 +; xeq32 x5, x0, x1 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -170,13 +170,13 @@ block2: ; VCode: ; block0: -; x5 = xneq32 x0, x1 +; xneq32 x5, x0, x1 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -234,13 +234,13 @@ block2: ; VCode: ; block0: -; x5 = xulteq64 x1, x0 +; xulteq64 x5, x1, x0 ; br_if x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 9470d04a4a02..e0b87760510a 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -15,9 +15,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -42,9 +42,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -71,10 +71,10 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 -; x1 = xconst8 1 -; x2 = xconst8 2 -; x3 = xconst8 3 +; xconst8 x0, 0 +; xconst8 x1, 1 +; xconst8 x2, 2 +; xconst8 x3, 3 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret @@ -104,9 +104,9 @@ block0: ; push_frame ; block0: ; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x4 = xadd64 x0, x2 -; x3 = xadd64 x1, x3 -; x0 = xadd64 x4, x3 +; xadd64 x4, x0, x2 +; xadd64 x3, x1, x3 +; xadd64 x0, x4, x3 ; pop_frame ; ret ; @@ -130,32 +130,32 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x30 +; stack_alloc32 48 ; block0: -; x15 = xconst8 0 +; xconst8 x15, 0 ; store64 OutgoingArg(0), x15 // flags = notrap aligned ; store64 OutgoingArg(8), x15 // flags = notrap aligned ; store64 OutgoingArg(16), x15 // flags = notrap aligned ; store64 OutgoingArg(24), x15 // flags = notrap aligned ; store64 OutgoingArg(32), x15 // flags = notrap aligned ; store64 OutgoingArg(40), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x30 +; stack_free32 48 ; pop_frame ; ret ; @@ -227,47 +227,47 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 +; stack_alloc32 64 ; store64 sp+56, x18 // flags = notrap aligned ; store64 sp+48, x20 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x18 = xmov x13 -; x20 = xmov x11 +; xmov x18, x13 +; xmov x20, x11 ; x24 = load64_u OutgoingArg(0) // flags = notrap aligned ; x11 = load64_u OutgoingArg(8) // flags = notrap aligned ; x13 = load64_u OutgoingArg(16) // flags = notrap aligned ; x19 = load64_u OutgoingArg(24) // flags = notrap aligned ; x21 = load64_u OutgoingArg(32) // flags = notrap aligned -; x25 = xadd64 x0, x1 -; x23 = xadd64 x2, x3 -; x5 = xadd64 x4, x5 -; x6 = xadd64 x6, x7 -; x7 = xadd64 x8, x9 -; x0 = xmov x20 -; x4 = xadd64 x10, x0 -; x10 = xmov x18 -; x8 = xadd64 x12, x10 -; x14 = xadd64 x14, x15 -; x15 = xadd64 x24, x11 -; x13 = xadd64 x11, x13 -; x0 = xadd64 x19, x21 -; x1 = xadd64 x25, x23 -; x2 = xadd64 x5, x6 -; x3 = xadd64 x7, x4 -; x14 = xadd64 x8, x14 -; x13 = xadd64 x15, x13 -; x15 = xadd64 x0, x0 -; x0 = xadd64 x1, x2 -; x14 = xadd64 x3, x14 -; x13 = xadd64 x13, x15 -; x14 = xadd64 x0, x14 -; x13 = xadd64 x13, x13 -; x0 = xadd64 x14, x13 +; xadd64 x25, x0, x1 +; xadd64 x23, x2, x3 +; xadd64 x5, x4, x5 +; xadd64 x6, x6, x7 +; xadd64 x7, x8, x9 +; xmov x0, x20 +; xadd64 x4, x10, x0 +; xmov x10, x18 +; xadd64 x8, x12, x10 +; xadd64 x14, x14, x15 +; xadd64 x15, x24, x11 +; xadd64 x13, x11, x13 +; xadd64 x0, x19, x21 +; xadd64 x1, x25, x23 +; xadd64 x2, x5, x6 +; xadd64 x3, x7, x4 +; xadd64 x14, x8, x14 +; xadd64 x13, x15, x13 +; xadd64 x15, x0, x0 +; xadd64 x0, x1, x2 +; xadd64 x14, x3, x14 +; xadd64 x13, x13, x15 +; xadd64 x14, x0, x14 +; xadd64 x13, x13, x13 +; xadd64 x0, x14, x13 ; x18 = load64_u sp+56 // flags = notrap aligned ; x20 = load64_u sp+48 // flags = notrap aligned -; stack_free32 0x40 +; stack_free32 64 ; pop_frame ; ret ; @@ -356,9 +356,9 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 +; stack_alloc32 64 ; block0: -; x15 = xconst8 0 +; xconst8 x15, 0 ; store64 OutgoingArg(0), x15 // flags = notrap aligned ; store64 OutgoingArg(8), x15 // flags = notrap aligned ; store64 OutgoingArg(16), x15 // flags = notrap aligned @@ -367,23 +367,23 @@ block0: ; store64 OutgoingArg(40), x15 // flags = notrap aligned ; store64 OutgoingArg(48), x15 // flags = notrap aligned ; store64 OutgoingArg(56), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x40 +; stack_free32 64 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/iadd.clif b/cranelift/filetests/filetests/isa/pulley64/iadd.clif index dd49ed0c3735..0ce8cc0122b4 100644 --- a/cranelift/filetests/filetests/isa/pulley64/iadd.clif +++ b/cranelift/filetests/filetests/isa/pulley64/iadd.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xadd64 x0, x1 +; xadd64 x0, x0, x1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/icmp.clif b/cranelift/filetests/filetests/isa/pulley64/icmp.clif index 3343d2f7d981..badfa73b3ceb 100644 --- a/cranelift/filetests/filetests/isa/pulley64/icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley64/icmp.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xeq64 x0, x1 +; xeq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -69,7 +69,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -99,7 +99,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -114,7 +114,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xneq64 x0, x1 +; xneq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -129,7 +129,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -144,7 +144,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -159,7 +159,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -174,7 +174,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x0, x1 +; xult64 x0, x0, x1 ; ret ; ; Disassembled: @@ -189,7 +189,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -204,7 +204,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -219,7 +219,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -234,7 +234,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x0, x1 +; xulteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -249,7 +249,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -264,7 +264,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -279,7 +279,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -294,7 +294,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x0, x1 +; xslt64 x0, x0, x1 ; ret ; ; Disassembled: @@ -309,7 +309,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -324,7 +324,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -339,7 +339,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -354,7 +354,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x0, x1 +; xslteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -369,7 +369,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -384,7 +384,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -399,7 +399,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -414,7 +414,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x1, x0 +; xult64 x0, x1, x0 ; ret ; ; Disassembled: @@ -429,7 +429,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -444,7 +444,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -459,7 +459,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -474,7 +474,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x1, x0 +; xslt64 x0, x1, x0 ; ret ; ; Disassembled: @@ -489,7 +489,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -504,7 +504,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -519,7 +519,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -534,7 +534,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x1, x0 +; xulteq64 x0, x1, x0 ; ret ; ; Disassembled: @@ -549,7 +549,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -564,7 +564,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -579,7 +579,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -594,7 +594,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x1, x0 +; xslteq64 x0, x1, x0 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/iconst.clif b/cranelift/filetests/filetests/isa/pulley64/iconst.clif index 5a143c3db352..8583ebf4d6f3 100644 --- a/cranelift/filetests/filetests/isa/pulley64/iconst.clif +++ b/cranelift/filetests/filetests/isa/pulley64/iconst.clif @@ -9,7 +9,7 @@ block0: ; VCode: ; block0: -; x0 = xconst16 255 +; xconst16 x0, 255 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 65535 +; xconst32 x0, 65535 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 -1 +; xconst32 x0, -1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0: ; VCode: ; block0: -; x0 = xconst64 -1 +; xconst64 x0, -1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/jump.clif b/cranelift/filetests/filetests/isa/pulley64/jump.clif index 4c22b5acc953..fbdc7c988280 100644 --- a/cranelift/filetests/filetests/isa/pulley64/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley64/jump.clif @@ -21,10 +21,10 @@ block3(v3: i8): ; block0: ; br_if x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; jump label3 ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; jump label3 ; block3: ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif b/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif index b9190587fc21..2f658f4b4802 100644 --- a/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif +++ b/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif @@ -10,10 +10,10 @@ block0(): ; VCode: ; push_frame -; stack_alloc32 0x10 +; stack_alloc32 16 ; block0: ; x0 = load_addr Slot(0) -; stack_free32 0x10 +; stack_free32 16 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index 23e569a23a23..fa458ec90486 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -23,7 +23,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -43,7 +43,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -63,7 +63,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -83,7 +83,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -114,8 +114,8 @@ block2: ; block1: ; ret ; block2: -; x5 = xconst8 42 -; x6 = xconst8 0 +; xconst8 x5, 42 +; xconst8 x6, 0 ; trap_if ne, Size64, x5, x6 // code = TrapCode(1) ; ret ; @@ -147,8 +147,8 @@ block2: ; block0: ; br_if x0, label2; jump label1 ; block1: -; x4 = xconst8 0 -; x5 = xconst8 0 +; xconst8 x4, 0 +; xconst8 x5, 0 ; trap_if eq, Size64, x4, x5 // code = TrapCode(1) ; ret ; block2: diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 5a63e7067234..5574f5af9d4d 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -15,6 +15,7 @@ extern crate std; extern crate alloc; /// Calls the given macro with each opcode. +#[macro_export] macro_rules! for_each_op { ( $macro:ident ) => { $macro! { @@ -213,6 +214,7 @@ macro_rules! for_each_op { } /// Calls the given macro with each extended opcode. +#[macro_export] macro_rules! for_each_extended_op { ( $macro:ident ) => { $macro! { From f682433b214f81f555fdd5c1f9272c6a4bb07184 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 07:21:19 -0700 Subject: [PATCH 09/30] Avoid OOM with `MallocMemory` and fuzzing (#9772) Tune `memory_reservation_for_growth` automatically when malloc-based memories are used to avoid hitting OOM conditions. --- crates/fuzzing/src/generators/config.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 8fbdb737aa2b..437c7c235ba0 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -685,10 +685,26 @@ impl WasmtimeConfig { /// the current state of the engine's development. fn make_internally_consistent(&mut self) { if !self.signals_based_traps { - // Spectre-based heap mitigations require signal handlers so this - // must always be disabled if signals-based traps are disabled. if let MemoryConfig::Normal(cfg) = &mut self.memory_config { + // Spectre-based heap mitigations require signal handlers so + // this must always be disabled if signals-based traps are + // disabled. cfg.cranelift_enable_heap_access_spectre_mitigations = None; + + // With configuration settings that match the use of malloc for + // linear memories cap the `memory_reservation_for_growth` value + // to something reasonable to avoid OOM in fuzzing. + if !cfg.memory_init_cow + && cfg.memory_guard_size == Some(0) + && cfg.memory_reservation == Some(0) + { + let min = 10 << 20; // 10 MiB + if let Some(val) = &mut cfg.memory_reservation_for_growth { + *val = (*val).min(min); + } else { + cfg.memory_reservation_for_growth = Some(min); + } + } } } } From 96f388d5f752ef05390c273146694b224f3bfc43 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 08:36:52 -0700 Subject: [PATCH 10/30] pulley: Remove hardcoded instruction sizes in interpreter (#9769) Use the new `Encode` trait to improve the `pc_rel_jump` helper. --- pulley/src/interp.rs | 50 +++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 1783ec78b102..5d82af604e69 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -719,10 +719,17 @@ struct Interpreter<'a> { } impl Interpreter<'_> { + /// Performs a relative jump of `offset` bytes from the current instruction. + /// + /// This will jump from the start of the current instruction, identified by + /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this + /// function actually points to the instruction after this one so `I` is + /// necessary to go back to ourselves after which we then go `offset` away. #[inline] - fn pc_rel_jump(&mut self, offset: PcRelOffset, inst_size: isize) -> ControlFlow { + fn pc_rel_jump(&mut self, offset: PcRelOffset) -> ControlFlow { let offset = isize::try_from(i32::from(offset)).unwrap(); - self.pc = unsafe { self.pc.offset(offset - inst_size) }; + let my_pc = self.current_pc::(); + self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) }; ControlFlow::Continue(()) } @@ -826,7 +833,7 @@ impl OpVisitor for Interpreter<'_> { fn call(&mut self, offset: PcRelOffset) -> ControlFlow { let return_addr = self.pc.as_ptr(); self.state[XReg::lr].set_ptr(return_addr.as_ptr()); - self.pc_rel_jump(offset, 5); + self.pc_rel_jump::(offset); ControlFlow::Continue(()) } @@ -843,14 +850,14 @@ impl OpVisitor for Interpreter<'_> { } fn jump(&mut self, offset: PcRelOffset) -> ControlFlow { - self.pc_rel_jump(offset, 5); + self.pc_rel_jump::(offset); ControlFlow::Continue(()) } fn br_if(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { let cond = self.state[cond].get_u64(); if cond != 0 { - self.pc_rel_jump(offset, 6) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -859,7 +866,7 @@ impl OpVisitor for Interpreter<'_> { fn br_if_not(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { let cond = self.state[cond].get_u64(); if cond == 0 { - self.pc_rel_jump(offset, 6) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -869,7 +876,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a == b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -879,7 +886,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a != b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -889,7 +896,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i32(); let b = self.state[b].get_i32(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -899,7 +906,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i32(); let b = self.state[b].get_i32(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -909,7 +916,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -919,7 +926,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -929,7 +936,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a == b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -939,7 +946,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a != b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -949,7 +956,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i64(); let b = self.state[b].get_i64(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -959,7 +966,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i64(); let b = self.state[b].get_i64(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -969,7 +976,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -979,7 +986,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -1384,9 +1391,14 @@ impl OpVisitor for Interpreter<'_> { // SAFETY: part of the contract of the interpreter is only dealing with // valid bytecode, so this offset should be safe. self.pc = unsafe { self.pc.offset(idx * 4) }; + + // Decode the `PcRelOffset` without tampering with `self.pc` as the + // jump is relative to `self.pc`. let mut tmp = self.pc; let rel = unwrap_uninhabited(PcRelOffset::decode(&mut tmp)); - self.pc_rel_jump(rel, 0) + let offset = isize::try_from(i32::from(rel)).unwrap(); + self.pc = unsafe { self.pc.offset(offset) }; + ControlFlow::Continue(()) } fn stack_alloc32(&mut self, amt: u32) -> ControlFlow { From 995ee8e09751921398d83d82063afef024606d37 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 10 Dec 2024 07:48:49 -0800 Subject: [PATCH 11/30] Update `arbitrary` to 1.4.1 (#9550) Nothing big here, just a bump to the latest version of `arbitrary` to stay up to date. --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- supply-chain/audits.toml | 5 +++++ supply-chain/imports.lock | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dc075f969e35..882e30c199b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,9 +139,9 @@ checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arbitrary" -version = "1.3.1" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e1373abdaa212b704512ec2bd8b26bd0b7d5c3f70117411a5d9a451383c859" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" dependencies = [ "derive_arbitrary", ] @@ -1084,9 +1084,9 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8" +checksum = "d475dfebcb4854d596b17b09f477616f80f17a550517f2b3615d8c205d5c802b" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 707507b408ee..e5d524207ad4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -196,7 +196,7 @@ unnecessary_cast = 'warn' allow_attributes_without_reason = 'warn' [workspace.dependencies] -arbitrary = { version = "1.3.1" } +arbitrary = { version = "1.4.0" } wasmtime-wmemcheck = { path = "crates/wmemcheck", version = "=29.0.0" } wasmtime = { path = "crates/wasmtime", version = "29.0.0", default-features = false } wasmtime-c-api-macros = { path = "crates/c-api-macros", version = "=29.0.0" } diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index f945e03aa8a9..e6ce2a06b15b 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -992,6 +992,11 @@ criteria = "safe-to-deploy" version = "1.1.4" notes = "I am the author of this crate." +[[audits.arbitrary]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "1.4.1" + [[audits.arrayref]] who = "Nick Fitzgerald " criteria = "safe-to-deploy" diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index a45a8f645c89..4261bd1ab592 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -437,6 +437,13 @@ user-id = 696 user-login = "fitzgen" user-name = "Nick Fitzgerald" +[[publisher.arbitrary]] +version = "1.4.0" +when = "2024-11-04" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.async-trait]] version = "0.1.71" when = "2023-07-05" @@ -686,6 +693,13 @@ user-id = 696 user-login = "fitzgen" user-name = "Nick Fitzgerald" +[[publisher.derive_arbitrary]] +version = "1.4.0" +when = "2024-11-04" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.dlmalloc]] version = "0.2.4" when = "2022-08-17" From 518ad13e519deeca0027f374704c116bd63b6df7 Mon Sep 17 00:00:00 2001 From: "moh.robati" <42697060+mohrobati@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:06:30 -0500 Subject: [PATCH 12/30] Update ADOPTERS.md (#9773) Added Huawei Cloud as one of the adopters of Wasmtime. --- ADOPTERS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ADOPTERS.md b/ADOPTERS.md index e211ab5378d6..e8b74e428ef3 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -12,6 +12,7 @@ Wasmtime is used in many different production use-cases. This list has grown sig | [Embark Studios](https://www.embark-studios.com/) | [@repi](https://github.com/repi) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Rust game engine | | [Fastly](https://fastly.com/) | [@fitzgen](https://github.com/fitzgen) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | The Compute@Edge platform helps you compile your custom code to WebAssembly and runs it at the Fastly edge using the WebAssembly System Interface for each compute request. | | [Fermyon](https://fermyon.com) | [@tschneidereit](https://github.com/tschneidereit) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Fermyon Cloud is a cloud application platform for WebAssembly-based serverless functions and microservices. | +| [Huawei](https://www.huawei.com) | [@mohrobati](https://github.com/mohrobati) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Huawei Cloud uses Wasmtime to run WebAssembly functions in both serverless cloud and on the edge. | | [InfinyOn](https://infinyon.com/) | [@sehz](https://github.com/sehz) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | InfinyOn leverages the power of WebAssembly SmartModules to execute real-time data transformations. | | [Microsoft](https://microsoft.com/) | [@devigned](https://gist.github.com/devigned) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Microsoft has had Wasmtime in preview for its WebAssembly System Interface (WASI) node pools in Azure Kubernetes Service since October 2021. | | [Redpanda](https://redpanda.com/) | [@rockwotj](https://github.com/rockwotj) | ![beta](https://img.shields.io/badge/-production-blue?style=flat) | Redpanda Data Transforms allow developers to transform data directly in the message broker. | From 438c0329562d36acad1dbc270ee1b5ec96227703 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 09:46:59 -0700 Subject: [PATCH 13/30] Add a guide for adding instructions to Pulley (#9765) * Add a guide for adding instructions to Pulley In helping others contribute to Pulley I've written up the steps I took to make this commit itself. I've picked a random test in `misc_testsuite` that previously didn't work on Pulley and was relatively small. I implemented a few lowerings and some new instructions and now the test passes. I've added `pulley/CONTRIBUTING.md` to document the experience along the way in case anyone else is interested to help out in the future. * Fix fuzzer build * Review comments * Fix pulley test * Fix no_std build * Fix fuzz simple push pop in miri --- .../codegen/src/isa/pulley_shared/lower.isle | 10 + crates/wast-util/src/lib.rs | 1 + pulley/CONTRIBUTING.md | 278 ++++++++++++++++++ pulley/fuzz/src/interp.rs | 4 +- pulley/src/interp.rs | 101 ++++--- pulley/src/lib.rs | 6 + 6 files changed, 362 insertions(+), 38 deletions(-) create mode 100644 pulley/CONTRIBUTING.md diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index dbbd6fad8fb8..461a9dd239ab 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -167,6 +167,16 @@ (rule (lower (has_type $I64 (iadd a b))) (pulley_xadd64 a b)) +;;;; Rules for `idiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sdiv a b))) + (pulley_xdiv32_s a b)) + +;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (band a b))) + (pulley_xand32 a b)) + ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (icmp cc a b @ (value_type $I64))) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index dad07addad5d..55bc58682298 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -425,6 +425,7 @@ impl WastTest { "misc_testsuite/stack_overflow.wast", "misc_testsuite/winch/misc.wast", "threads/exports.wast", + "misc_testsuite/control-flow.wast", ]; if supported.iter().any(|part| self.path.ends_with(part)) { diff --git a/pulley/CONTRIBUTING.md b/pulley/CONTRIBUTING.md new file mode 100644 index 000000000000..f103343443da --- /dev/null +++ b/pulley/CONTRIBUTING.md @@ -0,0 +1,278 @@ +# Contributing + +For general contribution to Wasmtime, see Wasmtime's [contributing docs][docs]. + +[docs]: https://docs.wasmtime.dev/contributing.html + +## Adding an instruction to Pulley + +So you want to add an instruction to Pulley. If you're reading this in the +not-so-distant future Pulley probably doesn't support all of WebAssembly yet and +you're interested in helping to improve the situation. This is intended to be a +small guide about how to add an instruction to Pulley through an early example +of doing so. + +#### Choose a test to get passing + +First off find a test in this repository, probably a `*.wast` test, which isn't +currently passing. At the time of this writing almost no tests are passing, but +for an up-to-date list check out the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs`. Here we're going to select +`./tests/misc_testsuite/control-flow.wast` as it's a reasonably small test. + +#### See the test failure + +Run this command: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast +``` + +This builds the `wasmtime` CLI with Pulley support enabled (`--features +pulley`), runs the `wast` subcommand, executes with the pulley target +(`--target pulley64`), and then runs our test. As of now this shows: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.08s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:77:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v5 = sdiv.i32 v2, v3`, type = `Some(types::I32)` +``` + +Note that if you run `cargo test --test wast control-flow.wast` it'll also run +the same test under a variety of configurations, but the test is expected to +fail under Pulley. You can update the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` to say the test is expected to pass, and then you +can see a similar failure. + +#### Adding an instruction: Pulley + +Here the failure is what's the most common failure in Pulley right now -- the +Pulley Cranelift backend is not yet complete and is missing a lowering. This +means that there is CLIF that cannot be lowered to Pulley bytecode just yet. + +The first thing to do is to probably add a new opcode to Pulley itself as +Pulley probably can't execute this operation just yet. Here we're interested in +signed 32-bit division. + +Pull up `pulley/src/lib.rs` and you'll be editing the `for_each_op!` macro +definition. If this is a "rare" opcode you can edit the `for_each_extended_op!` +macro instead. The syntax is the same between the two macros. + +Here this is a simple instruction, so let's add it directly: + +```rust +/// `dst = src1 / src2` (signed) +xdiv32_s = XDiv32S { operands: BinaryOperands }; +``` + +This defines the snake-case name of the instruction (`xdiv32_s`) that is used +by the disassembler and visitor trait, the upper-camel-case name of the +instruction (`XDiv32S`) used for the Rust type and `enum` variant, and +immediates and operands in the instruction itself. In this case it's a binary +operation using integer ("x") registers. + +> Note: By convention, we tend to include the class ("x") and width ("32") of +> registers operated upon by an instruction in its name. This distinguishes +> between, for example, 32-bit integer addition (`xadd32`) and 64-bit floating +> point addition (`fadd64`). + +Rerun our test command and we see: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Compiling pulley-interpreter v29.0.0 (/home/alex/code/wasmtime/pulley) +error[E0046]: not all trait items implemented, missing: `xdiv32_s` + --> pulley/src/interp.rs:807:1 + | +807 | impl OpVisitor for Interpreter<'_> { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ missing `xdiv32_s` in implementation + | + ::: pulley/src/decode.rs:574:17 + | +574 | fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return; + | ---------------------------------------------------------------------------- `xdiv32_s` from trait + + Compiling cranelift-codegen-meta v0.116.0 (/home/alex/code/wasmtime/cranelift/codegen/meta) +For more information about this error, try `rustc --explain E0046`. +``` + +This indicates that we need to actually implement the new opcode in the +interpreter. Open up `pulley/src/interp.rs` and append to `impl OpVisitor for +Interpreter` or `impl ExtendedOpVisitor for Interpreter` as appropriate. Here +we'll add: + +```rust +fn xdiv32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } +} +``` + +Note that division needs to handle the case that the divisor is 0 or causes an +overflow, hence the use of `checked_div` here. If that happens then a trap is +returned, otherwise interpretation will continue. Also note that the `get_i32` +method is used to specifically match the width and signedness of the +instruction itself, signed 32-bit division. Look around at other instructions +in `interp.rs` for inspiration of how to do various operations. + +Running our test again we get the same error as before! + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.08s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:77:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v5 = sdiv.i32 v2, v3`, type = `Some(types::I32)` +``` + +That leads us to the next part... + +#### Adding a Cranelift Lowering + +Next up we need to actually fix the error at hand, a new lowering rule needs to +be added to Cranelift. Here we'll be working in +`cranelift/codegen/src/isa/pulley_shared/lower.isle`. + +Our ISLE lowering rules are generally written like this: + +```lisp +(rule (lower ) + ) +``` + +This means "when lowering Cranelift's mid-level IR down to Pulley bytecode, and +we match the snippet ``, replace it with ``". (For more details, +see the [ISLE Language +Reference](https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/isle/docs/language-reference.md) +and [How ISLE is Integrated with +Cranelift](https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/isle-integration.md)) + + +In our case, we need to match an `sdiv` CLIF instruction that is operating on +32-bit values (this is the `(has_type $I32 ...)` bit) and then replace it with +our new Pulley `xdiv32_s` instruction: + +``` +;;;; Rules for `idiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sdiv a b))) + (pulley_xdiv32_s a b)) +``` + +Note that ISLE constructors for Pulley instructions, including the +`pulley_xdivs32_s` constructor for our new `xdiv32_s` Pulley instruction, are +automatically generated from the `for_each_op!` macro. + +Running our test again yields: + +``` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:83:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v26 = band.i32 v2, v13 ; v13 = 3`, type = `Some(types::I32)` +``` + +Progress! This is a different error than before. Now it's time to rinse and +repeat these steps. Be sure to skim the rest of `lower.isle` for inspiration on +how to implement lowering rules. You can also look at `lower.isle` for other +architecture backends too for inspiration. + +#### Flagging a test as passing + +After implementing a lowering for `band.i32` our test case is now passing: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 13.50s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +``` + +If we run the test suite though we'll see: + +``` +$ cargo test --test wast control-flow.wast + Finished `test` profile [unoptimized + debuginfo] target(s) in 29.14s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + +running 6 tests +...F.F +failures: + +---- CraneliftPulley/./tests/misc_testsuite/control-flow.wast ---- +this test is flagged as should-fail but it succeeded + +---- CraneliftPulley/pooling/./tests/misc_testsuite/control-flow.wast ---- +this test is flagged as should-fail but it succeeded + + +failures: + CraneliftPulley/./tests/misc_testsuite/control-flow.wast + CraneliftPulley/pooling/./tests/misc_testsuite/control-flow.wast + +test result: FAILED. 4 passed; 2 failed; 0 ignored; 0 measured; 4086 filtered out; finished in 0.05s + +error: test failed, to rerun pass `--test wast` +``` + +This indicates that the test was previously flagged as "should fail", but that +assertion is no longer true! Update the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` so that it expects the test to pass and we'll +see: + +``` +$ cargo test --test wast control-flow.wast + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.74s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + +running 6 tests +...... +test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 4086 filtered out; finished in 0.05s +``` + +Success! + +But we aren't quite done yet: the new lowerings we added might have made +additional tests that previously failed start passing as well. If so, then we +also want to mark those tests as expected to pass now. We can double check by +running the full `wast` test suite: + +``` +$ cargo test --test wast Pulley + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.74s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + + +running 1364 teststest result: ok. 1364 passed; 0 failed; 0 ignored; 0 measured; 2728 filtered out; finished in 0.93s +``` + +Alas, maybe next time! + +#### Clean up and make a PR + +All that's left now is to clean things up, document anything necessary, and make +a pull request. + +To view the complete pull request that implemented `xdiv32_s` and `xand32` +Pulley instructions and got `./tests/misc_testsuite/control-flow.wast` passing +(and also introduced this documentation) check out +[#9765](https://github.com/bytecodealliance/wasmtime/pull/9765). + +Thanks for helping out! diff --git a/pulley/fuzz/src/interp.rs b/pulley/fuzz/src/interp.rs index be968d4ceeca..60efe05f5c61 100644 --- a/pulley/fuzz/src/interp.rs +++ b/pulley/fuzz/src/interp.rs @@ -112,7 +112,9 @@ fn op_is_safe_for_fuzzing(op: &Op) -> bool { | Op::Xslt32(Xslt32 { operands, .. }) | Op::Xslteq32(Xslteq32 { operands, .. }) | Op::Xult32(Xult32 { operands, .. }) - | Op::Xulteq32(Xulteq32 { operands, .. }) => !operands.dst.is_special(), + | Op::Xulteq32(Xulteq32 { operands, .. }) + | Op::XDiv32S(XDiv32S { operands, .. }) + | Op::XAnd32(XAnd32 { operands, .. }) => !operands.dst.is_special(), Op::PushFrame(_) | Op::PopFrame(_) => false, Op::XPush32(_) | Op::XPush64(_) => false, Op::XPop32(_) | Op::XPop64(_) => false, diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 5d82af604e69..468f5cb7609c 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -650,7 +650,8 @@ impl MachineState { /// Inner private module to prevent creation of the `Done` structure outside of /// this module. mod done { - use super::{Interpreter, MachineState}; + use super::{Encode, Interpreter, MachineState}; + use core::ops::ControlFlow; use core::ptr::NonNull; /// Zero-sized sentinel indicating that pulley execution has halted. @@ -688,24 +689,31 @@ mod done { impl Interpreter<'_> { /// Finishes execution by recording `DoneReason::Trap`. - pub fn done_trap(&mut self, pc: NonNull) -> Done { + /// + /// This method takes an `I` generic parameter indicating which + /// instruction is executing this function and generating a trap. That's + /// used to go backwards from the current `pc` which is just beyond the + /// instruction to point to the instruction itself in the trap metadata + /// returned from the interpreter. + pub fn done_trap(&mut self) -> ControlFlow { + let pc = self.current_pc::(); self.state.done_reason = Some(DoneReason::Trap(pc)); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } /// Finishes execution by recording `DoneReason::CallIndirectHost`. - pub fn done_call_indirect_host(&mut self, id: u8) -> Done { + pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow { self.state.done_reason = Some(DoneReason::CallIndirectHost { id, resume: self.pc.as_ptr(), }); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } /// Finishes execution by recording `DoneReason::ReturnToHost`. - pub fn done_return_to_host(&mut self) -> Done { + pub fn done_return_to_host(&mut self) -> ControlFlow { self.state.done_reason = Some(DoneReason::ReturnToHost(())); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } } } @@ -740,10 +748,13 @@ impl Interpreter<'_> { } /// `sp -= size_of::(); *sp = val;` + /// + /// Note that `I` is the instruction which is pushing data to use if a trap + /// is generated. #[must_use] - fn push(&mut self, val: T, pc: NonNull) -> ControlFlow { + fn push(&mut self, val: T) -> ControlFlow { let new_sp = self.state[XReg::sp].get_ptr::().wrapping_sub(1); - self.set_sp(new_sp, pc)?; + self.set_sp::(new_sp.cast())?; unsafe { new_sp.write_unaligned(val); } @@ -762,12 +773,16 @@ impl Interpreter<'_> { /// /// Returns a trap if this would result in stack overflow, or if `sp` is /// beneath the base pointer of `self.state.stack`. + /// + /// The `I` parameter here is the instruction that is setting the stack + /// pointer and is used to calculate this instruction's own `pc` if this + /// instruction traps. #[must_use] - fn set_sp(&mut self, sp: *mut T, pc: NonNull) -> ControlFlow { + fn set_sp(&mut self, sp: *mut u8) -> ControlFlow { let sp_raw = sp as usize; let base_raw = self.state.stack.as_ptr() as usize; if sp_raw < base_raw { - return ControlFlow::Break(self.done_trap(pc)); + return self.done_trap::(); } self.set_sp_unchecked(sp); ControlFlow::Continue(()) @@ -790,20 +805,20 @@ impl Interpreter<'_> { fn simple_push_pop() { let mut state = MachineState::with_stack(vec![0; 16]); unsafe { + let mut bytecode = [0; 10]; let mut i = Interpreter { state: &mut state, // this isn't actually read so just manufacture a dummy one - pc: UnsafeBytecodeStream::new((&mut 0).into()), + pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()), }; - let pc = NonNull::from(&0); - assert!(i.push(0_i32, pc).is_continue()); + assert!(i.push::(0_i32).is_continue()); assert_eq!(i.pop::(), 0_i32); - assert!(i.push(1_i32, pc).is_continue()); - assert!(i.push(2_i32, pc).is_continue()); - assert!(i.push(3_i32, pc).is_continue()); - assert!(i.push(4_i32, pc).is_continue()); - assert!(i.push(5_i32, pc).is_break()); - assert!(i.push(6_i32, pc).is_break()); + assert!(i.push::(1_i32).is_continue()); + assert!(i.push::(2_i32).is_continue()); + assert!(i.push::(3_i32).is_continue()); + assert!(i.push::(4_i32).is_continue()); + assert!(i.push::(5_i32).is_break()); + assert!(i.push::(6_i32).is_break()); assert_eq!(i.pop::(), 4_i32); assert_eq!(i.pop::(), 3_i32); assert_eq!(i.pop::(), 2_i32); @@ -822,7 +837,7 @@ impl OpVisitor for Interpreter<'_> { fn ret(&mut self) -> ControlFlow { let lr = self.state[XReg::lr]; if lr == XRegVal::HOST_RETURN_ADDR { - ControlFlow::Break(self.done_return_to_host()) + self.done_return_to_host() } else { let return_addr = lr.get_ptr(); self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(return_addr)) }; @@ -1290,29 +1305,25 @@ impl OpVisitor for Interpreter<'_> { } fn xpush32(&mut self, src: XReg) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[src].get_u32(), me)?; + self.push::(self.state[src].get_u32())?; ControlFlow::Continue(()) } fn xpush32_many(&mut self, srcs: RegSet) -> ControlFlow { - let me = self.current_pc::(); for src in srcs { - self.push(self.state[src].get_u32(), me)?; + self.push::(self.state[src].get_u32())?; } ControlFlow::Continue(()) } fn xpush64(&mut self, src: XReg) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[src].get_u64(), me)?; + self.push::(self.state[src].get_u64())?; ControlFlow::Continue(()) } fn xpush64_many(&mut self, srcs: RegSet) -> ControlFlow { - let me = self.current_pc::(); for src in srcs { - self.push(self.state[src].get_u64(), me)?; + self.push::(self.state[src].get_u64())?; } ControlFlow::Continue(()) } @@ -1346,9 +1357,8 @@ impl OpVisitor for Interpreter<'_> { } fn push_frame(&mut self) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[XReg::lr].get_ptr::(), me)?; - self.push(self.state[XReg::fp].get_ptr::(), me)?; + self.push::(self.state[XReg::lr].get_ptr::())?; + self.push::(self.state[XReg::fp].get_ptr::())?; self.state[XReg::fp] = self.state[XReg::sp]; ControlFlow::Continue(()) } @@ -1402,10 +1412,9 @@ impl OpVisitor for Interpreter<'_> { } fn stack_alloc32(&mut self, amt: u32) -> ControlFlow { - let me = self.current_pc::(); let amt = usize::try_from(amt).unwrap(); let new_sp = self.state[XReg::sp].get_ptr::().wrapping_sub(amt); - self.set_sp(new_sp, me)?; + self.set_sp::(new_sp)?; ControlFlow::Continue(()) } @@ -1451,6 +1460,25 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_i64(src.into()); ControlFlow::Continue(()) } + + fn xdiv32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xand32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a & b); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { @@ -1459,12 +1487,11 @@ impl ExtendedOpVisitor for Interpreter<'_> { } fn trap(&mut self) -> ControlFlow { - let trap_pc = self.current_pc::(); - ControlFlow::Break(self.done_trap(trap_pc)) + self.done_trap::() } fn call_indirect_host(&mut self, id: u8) -> ControlFlow { - ControlFlow::Break(self.done_call_indirect_host(id)) + self.done_call_indirect_host(id) } fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 5574f5af9d4d..e0dbbe289c5b 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -209,6 +209,12 @@ macro_rules! for_each_op { sext16 = Sext16 { dst: XReg, src: XReg }; /// `dst = sext(low32(src))` sext32 = Sext32 { dst: XReg, src: XReg }; + + /// `dst = src1 / src2` (signed) + xdiv32_s = XDiv32S { operands: BinaryOperands }; + + /// `dst = src1 & src2` + xand32 = XAnd32 { operands: BinaryOperands }; } }; } From 23e8dcea5442c4f903817f2a5cea42d853bdd5c7 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2024 11:29:10 -0700 Subject: [PATCH 14/30] pulley: Implement more of loads/stores (#9775) * pulley: Implement more of loads/stores This commit gets the `address.wast` spec test working by filling out more load/store infrastructure in Pulley. In doing so I've done a refactoring of the existing load/store methods. Changes here are: * All load/stores are `*_offset32` now instead of optionally having no offset, a 8-bit offset, or a 64-bit offset. * All x-register loads/stores are prefixed with `x` now. * All loads/stores have "le" for little-endian in their name. * Loads/stores are refactored to have 8 and 16-bit variants. * Sign-extending loads now either extend to 32 or 64 depending on the opcode. * Float loads/stores are added. * Big-endian is handled with explicit big-endian loads/stores instead `bswap` to handle this transparently in the backend (e.g. for stores not ISLE-generated) and to handle floats. * Remove pulley interpreter fuzz target This is a bit onerous to keep updated and is probably best subsumed by the fuzzing support we have in general for wasm. * Update pulley tests * Fixes from a rebase * Review comments * Update test expectations --- cranelift/codegen/meta/src/pulley.rs | 49 ++- .../codegen/src/isa/pulley_shared/abi.rs | 7 +- .../codegen/src/isa/pulley_shared/inst.isle | 48 ++- .../src/isa/pulley_shared/inst/args.rs | 41 +- .../src/isa/pulley_shared/inst/emit.rs | 173 +++++--- .../codegen/src/isa/pulley_shared/inst/mod.rs | 118 ++++-- .../codegen/src/isa/pulley_shared/lower.isle | 127 ++++-- .../src/isa/pulley_shared/lower/isle.rs | 4 - .../codegen/src/isa/pulley_shared/mod.rs | 9 - .../filetests/isa/pulley32/call.clif | 64 +-- .../filetests/isa/pulley32/load.clif | 16 +- .../filetests/isa/pulley32/store.clif | 16 +- .../filetests/isa/pulley32/trap.clif | 2 +- .../filetests/isa/pulley64/call.clif | 98 ++--- .../filetests/isa/pulley64/load.clif | 16 +- .../filetests/isa/pulley64/loadbe.clif | 24 +- .../filetests/isa/pulley64/store.clif | 16 +- .../filetests/isa/pulley64/storebe.clif | 24 +- .../filetests/isa/pulley64/trap.clif | 2 +- .../code_translator/bounds_checks.rs | 10 + crates/wasmtime/src/config.rs | 1 - crates/wasmtime/src/runtime/vm/interpreter.rs | 45 +- .../wasmtime/src/runtime/vm/traphandlers.rs | 1 + crates/wast-util/src/lib.rs | 9 +- fuzz/fuzz_targets/pulley.rs | 5 +- pulley/fuzz/src/interp.rs | 144 ------- pulley/fuzz/src/lib.rs | 3 - pulley/src/interp.rs | 313 +++++++++----- pulley/src/lib.rs | 192 +++++++-- pulley/tests/all/interp.rs | 389 +----------------- tests/disas/pulley/call.wat | 4 +- tests/disas/pulley/epoch-simple.wat | 16 +- 32 files changed, 975 insertions(+), 1011 deletions(-) delete mode 100644 pulley/fuzz/src/interp.rs diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs index 90717fb0c7d9..6e9d3831b783 100644 --- a/cranelift/codegen/meta/src/pulley.rs +++ b/cranelift/codegen/meta/src/pulley.rs @@ -29,20 +29,31 @@ const EXTENDED_OPS: &[Inst<'_>] = pulley_interpreter::for_each_extended_op!(defi enum Operand<'a> { Normal { name: &'a str, ty: &'a str }, Writable { name: &'a str, ty: &'a str }, + TrapCode { name: &'a str, ty: &'a str }, Binop { reg: &'a str }, } impl Inst<'_> { fn operands(&self) -> impl Iterator> { - self.fields.iter().map(|(name, ty)| match (*name, *ty) { - ("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" }, - (name, "RegSet < XReg >") => Operand::Normal { - name, - ty: "VecXReg", - }, - ("dst", ty) => Operand::Writable { name, ty }, - (name, ty) => Operand::Normal { name, ty }, - }) + self.fields + .iter() + .map(|(name, ty)| match (*name, *ty) { + ("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" }, + (name, "RegSet < XReg >") => Operand::Normal { + name, + ty: "VecXReg", + }, + ("dst", ty) => Operand::Writable { name, ty }, + (name, ty) => Operand::Normal { name, ty }, + }) + .chain(if self.name.contains("Trap") { + Some(Operand::TrapCode { + name: "code", + ty: "TrapCode", + }) + } else { + None + }) } fn skip(&self) -> bool { @@ -54,9 +65,6 @@ impl Inst<'_> { // Skip special instructions not used in Cranelift. "XPush32Many" | "XPush64Many" | "XPop32Many" | "XPop64Many" => true, - // The pulley backend has its own trap-with-trap-code. - "Trap" => true, - // Skip more branching-related instructions. n => n.starts_with("Br"), } @@ -99,6 +107,11 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { } } } + Operand::TrapCode { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + format_string.push_str(&format!(" // trap={{{name}:?}}")); + } Operand::Binop { reg: _ } => { pat.push_str("dst, src1, src2,"); format_string.push_str(" {dst}, {src1}, {src2}"); @@ -150,6 +163,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { pat.push_str(","); } } + Operand::TrapCode { .. } => {} Operand::Binop { reg: _ } => { pat.push_str("dst, src1, src2,"); uses.push("src1"); @@ -195,6 +209,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { let mut pat = String::new(); let mut args = String::new(); + let mut trap = String::new(); for op in inst.operands() { match op { Operand::Normal { name, ty: _ } | Operand::Writable { name, ty: _ } => { @@ -204,6 +219,11 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { args.push_str(name); args.push_str(","); } + Operand::TrapCode { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + trap.push_str(&format!("sink.add_trap({name});\n")); + } Operand::Binop { reg: _ } => { pat.push_str("dst, src1, src2,"); args.push_str( @@ -216,6 +236,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { rust.push_str(&format!( " RawInst::{name} {{ {pat} }} => {{ + {trap} pulley_interpreter::encode::{snake_name}(sink, {args}) }} " @@ -241,7 +262,7 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { isle.push_str(inst.name); for op in inst.operands() { match op { - Operand::Normal { name, ty } => { + Operand::Normal { name, ty } | Operand::TrapCode { name, ty } => { isle.push_str(&format!("\n ({name} {ty})")); } Operand::Writable { name, ty } => { @@ -276,7 +297,7 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { let mut ops = Vec::new(); for op in inst.operands() { match op { - Operand::Normal { name, ty } => { + Operand::Normal { name, ty } | Operand::TrapCode { name, ty } => { isle.push_str(ty); rule.push_str(name); ops.push(name); diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index f65ac69eec55..8ea8fb0c9116 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -243,7 +243,6 @@ where } fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { - let offset = i64::from(offset); let base = XReg::try_from(base).unwrap(); let mem = Amode::RegOffset { base, offset }; Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into() @@ -365,7 +364,7 @@ where Inst::gen_load( writable_fp_reg(), Amode::SpOffset { - offset: i64::from(incoming_args_diff), + offset: i32::try_from(incoming_args_diff).unwrap(), }, I64, MemFlags::trusted(), @@ -423,7 +422,7 @@ where insts.push( Inst::gen_store( Amode::SpOffset { - offset: i64::from(stack_size - cur_offset), + offset: i32::try_from(stack_size - cur_offset).unwrap(), }, Reg::from(reg.to_reg()), ty, @@ -474,7 +473,7 @@ where Inst::gen_load( reg.map(Reg::from), Amode::SpOffset { - offset: i64::from(stack_size - cur_offset), + offset: i32::try_from(stack_size - cur_offset).unwrap(), }, ty, MemFlags::trusted(), diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 59c720a605b8..18661d64ddbd 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -30,9 +30,6 @@ ;;;; Actual Instructions ;;;; - ;; Raise a trap. - (Trap (code TrapCode)) - ;; Trap if `src1 cond src2`. (TrapIf (cond IntCC) (size OperandSize) (src1 XReg) (src2 XReg) (code TrapCode)) @@ -77,11 +74,16 @@ ;; Load the memory address referenced by `mem` into `dst`. (LoadAddr (dst WritableXReg) (mem Amode)) - ;; Loads. - (Load (dst WritableReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) + ;; Load `ty` bytes from memory pointed to by `mem` and store in `dst`. + ;; + ;; How much is written to the register is defined by `ExtKind`. The `flags` + ;; control behavior such as endianness. + (XLoad (dst WritableXReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) + (FLoad (dst WritableFReg) (mem Amode) (ty Type) (flags MemFlags)) ;; Stores. - (Store (mem Amode) (src Reg) (ty Type) (flags MemFlags)) + (XStore (mem Amode) (src XReg) (ty Type) (flags MemFlags)) + (FStore (mem Amode) (src FReg) (ty Type) (flags MemFlags)) ;; A raw pulley instruction generated at compile-time via Pulley's ;; `for_each_op!` macro. This variant has `pulley_*` constructors to @@ -104,13 +106,13 @@ (type Amode (enum - (SpOffset (offset i64)) - (RegOffset (base XReg) (offset i64)) + (SpOffset (offset i32)) + (RegOffset (base XReg) (offset i32)) (Stack (amode StackAMode)) ) ) -(type ExtKind (enum None Sign Zero)) +(type ExtKind (enum None Sign32 Sign64 Zero32 Zero64)) ;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -345,10 +347,6 @@ ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl pulley_trap (TrapCode) SideEffectNoResult) -(rule (pulley_trap code) - (SideEffectNoResult.Inst (MInst.Trap code))) - (decl pulley_trap_if (IntCC OperandSize XReg XReg TrapCode) SideEffectNoResult) (rule (pulley_trap_if cond size src1 src2 code) (SideEffectNoResult.Inst (MInst.TrapIf cond size src1 src2 code))) @@ -400,15 +398,25 @@ (rule (pulley_br_if_xulteq32 a b taken not_taken) (SideEffectNoResult.Inst (MInst.BrIfXulteq32 a b taken not_taken))) -(decl pulley_load (Amode Type MemFlags ExtKind) Reg) -(rule (pulley_load amode ty flags ext) - (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.Load dst amode ty flags ext)))) +(decl pulley_xload (Amode Type MemFlags ExtKind) XReg) +(rule (pulley_xload amode ty flags ext) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.XLoad dst amode ty flags ext)))) + dst)) + +(decl pulley_xstore (Amode XReg Type MemFlags) SideEffectNoResult) +(rule (pulley_xstore amode src ty flags) + (SideEffectNoResult.Inst (MInst.XStore amode src ty flags))) + +(decl pulley_fload (Amode Type MemFlags) FReg) +(rule (pulley_fload amode ty flags) + (let ((dst WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.FLoad dst amode ty flags)))) dst)) -(decl pulley_store (Amode Reg Type MemFlags) SideEffectNoResult) -(rule (pulley_store amode src ty flags) - (SideEffectNoResult.Inst (MInst.Store amode src ty flags))) +(decl pulley_fstore (Amode FReg Type MemFlags) SideEffectNoResult) +(rule (pulley_fstore amode src ty flags) + (SideEffectNoResult.Inst (MInst.FStore amode src ty flags))) (decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit) (rule (gen_br_table idx default labels) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs index 0d6dc6161104..b00a0aa82b61 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs @@ -156,35 +156,38 @@ impl Amode { } } - pub(crate) fn get_base_register(&self) -> Option { + pub(crate) fn get_base_register(&self) -> Option { match self { Amode::RegOffset { base, offset: _ } => Some((*base).into()), - Amode::SpOffset { .. } | Amode::Stack { .. } => Some(stack_reg()), + Amode::SpOffset { .. } | Amode::Stack { .. } => Some(XReg::new(stack_reg()).unwrap()), } } - pub(crate) fn get_offset_with_state

(&self, state: &EmitState

) -> i64 + pub(crate) fn get_offset_with_state

(&self, state: &EmitState

) -> i32 where P: PulleyTargetKind, { match self { Amode::RegOffset { base: _, offset } | Amode::SpOffset { offset } => *offset, - Amode::Stack { amode } => match amode { - StackAMode::IncomingArg(offset, stack_args_size) => { - let offset = i64::from(*stack_args_size) - *offset; - let frame_layout = state.frame_layout(); - let sp_offset = frame_layout.tail_args_size - + frame_layout.setup_area_size - + frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size; - i64::from(sp_offset) - offset - } - StackAMode::Slot(offset) => { - offset + i64::from(state.frame_layout().outgoing_args_size) - } - StackAMode::OutgoingArg(offset) => *offset, - }, + Amode::Stack { amode } => { + let offset64 = match amode { + StackAMode::IncomingArg(offset, stack_args_size) => { + let offset = i64::from(*stack_args_size) - *offset; + let frame_layout = state.frame_layout(); + let sp_offset = frame_layout.tail_args_size + + frame_layout.setup_area_size + + frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size; + i64::from(sp_offset) - offset + } + StackAMode::Slot(offset) => { + offset + i64::from(state.frame_layout().outgoing_args_size) + } + StackAMode::OutgoingArg(offset) => *offset, + }; + i32::try_from(offset64).unwrap() + } } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index a4b862fad2ae..fb868c2c60f5 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -1,14 +1,13 @@ //! Pulley binary code emission. use super::*; -use crate::ir; +use crate::ir::{self, Endianness}; use crate::isa::pulley_shared::abi::PulleyMachineDeps; use crate::isa::pulley_shared::PointerWidth; use core::marker::PhantomData; use cranelift_control::ControlPlane; use pulley_interpreter::encode as enc; use pulley_interpreter::regs::BinaryOperands; -use pulley_interpreter::regs::Reg as _; pub struct EmitInfo { #[allow(dead_code)] // Will get used as we fill out this backend. @@ -28,6 +27,15 @@ impl EmitInfo { isa_flags, } } + + fn endianness(&self, flags: MemFlags) -> Endianness { + let target_endianness = if self.isa_flags.big_endian() { + Endianness::Big + } else { + Endianness::Little + }; + flags.endianness(target_endianness) + } } /// State carried between emissions of a sequence of instructions. @@ -124,11 +132,6 @@ fn pulley_emit

( // Pseduo-instructions that don't actually encode to anything. Inst::Args { .. } | Inst::Rets { .. } | Inst::Unwind { .. } => {} - Inst::Trap { code } => { - sink.add_trap(*code); - enc::trap(sink); - } - Inst::TrapIf { cond, size, @@ -387,8 +390,6 @@ fn pulley_emit

( let offset = mem.get_offset_with_state(state); if let Some(base) = base { - let base = XReg::new(base).unwrap(); - if offset == 0 { enc::xmov(sink, dst, base); } else { @@ -396,10 +397,8 @@ fn pulley_emit

( enc::xconst8(sink, dst, offset); } else if let Ok(offset) = i16::try_from(offset) { enc::xconst16(sink, dst, offset); - } else if let Ok(offset) = i32::try_from(offset) { - enc::xconst32(sink, dst, offset); } else { - enc::xconst64(sink, dst, offset); + enc::xconst32(sink, dst, offset); } match P::pointer_width() { @@ -416,55 +415,141 @@ fn pulley_emit

( } } - Inst::Load { + Inst::XLoad { dst, mem, ty, - flags: _, + flags, ext, } => { + use Endianness as E; use ExtKind as X; let r = mem.get_base_register().unwrap(); - let r = reg_to_pulley_xreg(r); - let dst = reg_to_pulley_xreg(dst.to_reg()); let x = mem.get_offset_with_state(state); - match (*ext, *ty, i8::try_from(x)) { - (X::Sign, types::I32, Ok(0)) => enc::load32_s(sink, dst, r), - (X::Sign, types::I32, Ok(x)) => enc::load32_s_offset8(sink, dst, r, x), - (X::Sign, types::I32, Err(_)) => enc::load32_s_offset64(sink, dst, r, x), - - (X::Zero, types::I32, Ok(0)) => enc::load32_u(sink, dst, r), - (X::Zero, types::I32, Ok(x)) => enc::load32_u_offset8(sink, dst, r, x), - (X::Zero, types::I32, Err(_)) => enc::load32_u_offset64(sink, dst, r, x), - - (_, types::I64, Ok(0)) => enc::load64(sink, dst, r), - (_, types::I64, Ok(x)) => enc::load64_offset8(sink, dst, r, x), - (_, types::I64, Err(_)) => enc::load64_offset64(sink, dst, r, x), + let endian = emit_info.endianness(*flags); + match *ty { + I8 => match ext { + X::None | X::Zero32 => enc::xload8_u32_offset32(sink, dst, r, x), + X::Zero64 => enc::xload8_u64_offset32(sink, dst, r, x), + X::Sign32 => enc::xload8_s32_offset32(sink, dst, r, x), + X::Sign64 => enc::xload8_s64_offset32(sink, dst, r, x), + }, + I16 => match (ext, endian) { + (X::None | X::Zero32, E::Little) => { + enc::xload16le_u32_offset32(sink, dst, r, x); + } + (X::Sign32, E::Little) => { + enc::xload16le_s32_offset32(sink, dst, r, x); + } + (X::Zero64, E::Little) => { + enc::xload16le_u64_offset32(sink, dst, r, x); + } + (X::Sign64, E::Little) => { + enc::xload16le_s64_offset32(sink, dst, r, x); + } + (X::None | X::Zero32 | X::Zero64, E::Big) => { + enc::xload16be_u64_offset32(sink, dst, r, x); + } + (X::Sign32 | X::Sign64, E::Big) => { + enc::xload16be_s64_offset32(sink, dst, r, x); + } + }, + I32 => match (ext, endian) { + (X::None | X::Zero32 | X::Sign32, E::Little) => { + enc::xload32le_offset32(sink, dst, r, x); + } + (X::Zero64, E::Little) => { + enc::xload32le_u64_offset32(sink, dst, r, x); + } + (X::Sign64, E::Little) => { + enc::xload32le_s64_offset32(sink, dst, r, x); + } + (X::None | X::Zero32 | X::Zero64, E::Big) => { + enc::xload32be_u64_offset32(sink, dst, r, x); + } + (X::Sign32 | X::Sign64, E::Big) => { + enc::xload32be_s64_offset32(sink, dst, r, x); + } + }, + I64 => match endian { + E::Little => enc::xload64le_offset32(sink, dst, r, x), + E::Big => enc::xload64be_offset32(sink, dst, r, x), + }, + _ => unimplemented!("xload ty={ty:?}"), + } + } - (..) => unimplemented!("load ext={ext:?} ty={ty}"), + Inst::FLoad { + dst, + mem, + ty, + flags, + } => { + use Endianness as E; + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + match *ty { + F32 => match endian { + E::Little => enc::fload32le_offset32(sink, dst, r, x), + E::Big => enc::fload32be_offset32(sink, dst, r, x), + }, + F64 => match endian { + E::Little => enc::fload64le_offset32(sink, dst, r, x), + E::Big => enc::fload64be_offset32(sink, dst, r, x), + }, + _ => unimplemented!("fload ty={ty:?}"), } } - Inst::Store { + Inst::XStore { mem, src, ty, - flags: _, + flags, } => { + use Endianness as E; let r = mem.get_base_register().unwrap(); - let r = reg_to_pulley_xreg(r); - let src = reg_to_pulley_xreg(*src); let x = mem.get_offset_with_state(state); - match (*ty, i8::try_from(x)) { - (types::I32, Ok(0)) => enc::store32(sink, r, src), - (types::I32, Ok(x)) => enc::store32_offset8(sink, r, x, src), - (types::I32, Err(_)) => enc::store32_offset64(sink, r, x, src), - - (types::I64, Ok(0)) => enc::store64(sink, r, src), - (types::I64, Ok(x)) => enc::store64_offset8(sink, r, x, src), - (types::I64, Err(_)) => enc::store64_offset64(sink, r, x, src), + let endian = emit_info.endianness(*flags); + match *ty { + I8 => enc::xstore8_offset32(sink, r, x, src), + I16 => match endian { + E::Little => enc::xstore16le_offset32(sink, r, x, src), + E::Big => enc::xstore16be_offset32(sink, r, x, src), + }, + I32 => match endian { + E::Little => enc::xstore32le_offset32(sink, r, x, src), + E::Big => enc::xstore32be_offset32(sink, r, x, src), + }, + I64 => match endian { + E::Little => enc::xstore64le_offset32(sink, r, x, src), + E::Big => enc::xstore64be_offset32(sink, r, x, src), + }, + _ => unimplemented!("xstore ty={ty:?}"), + } + } - (..) => todo!(), + Inst::FStore { + mem, + src, + ty, + flags, + } => { + use Endianness as E; + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + match *ty { + F32 => match endian { + E::Little => enc::fstore32le_offset32(sink, r, x, src), + E::Big => enc::fstore32be_offset32(sink, r, x, src), + }, + F64 => match endian { + E::Little => enc::fstore64le_offset32(sink, r, x, src), + E::Big => enc::fstore64be_offset32(sink, r, x, src), + }, + _ => unimplemented!("fstore ty={ty:?}"), } } @@ -557,7 +642,3 @@ fn br_if_cond_helper

( sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); enc::jump(sink, 0x00000000); } - -fn reg_to_pulley_xreg(r: Reg) -> pulley_interpreter::XReg { - pulley_interpreter::XReg::new(r.to_real_reg().unwrap().hw_enc()).unwrap() -} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index dd71f31050bf..0678a02c48d3 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -44,22 +44,40 @@ mod generated { impl Inst { /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(dst: Writable, mem: Amode, ty: Type, flags: MemFlags) -> Inst { - Inst::Load { - dst, - mem, - ty, - flags, - ext: ExtKind::Zero, + if ty.is_int() { + Inst::XLoad { + dst: dst.map(|r| XReg::new(r).unwrap()), + mem, + ty, + flags, + ext: ExtKind::None, + } + } else { + Inst::FLoad { + dst: dst.map(|r| FReg::new(r).unwrap()), + mem, + ty, + flags, + } } } /// Generic constructor for a store. pub fn gen_store(mem: Amode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { - Inst::Store { - mem, - src: from_reg, - ty, - flags, + if ty.is_int() { + Inst::XStore { + mem, + src: XReg::new(from_reg).unwrap(), + ty, + flags, + } + } else { + Inst::FStore { + mem, + src: FReg::new(from_reg).unwrap(), + ty, + flags, + } } } } @@ -77,7 +95,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } } - Inst::Unwind { .. } | Inst::Trap { .. } | Inst::Nop => {} + Inst::Unwind { .. } | Inst::Nop => {} Inst::TrapIf { cond: _, @@ -183,7 +201,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands(collector); } - Inst::Load { + Inst::XLoad { dst, mem, ty: _, @@ -194,7 +212,27 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands(collector); } - Inst::Store { + Inst::XStore { + mem, + src, + ty: _, + flags: _, + } => { + mem.get_operands(collector); + collector.reg_use(src); + } + + Inst::FLoad { + dst, + mem, + ty: _, + flags: _, + } => { + collector.reg_def(dst); + mem.get_operands(collector); + } + + Inst::FStore { mem, src, ty: _, @@ -302,7 +340,9 @@ where fn is_safepoint(&self) -> bool { match self.inst { - Inst::Trap { .. } => true, + Inst::Raw { + raw: RawInst::Trap { .. }, + } => true, _ => false, } } @@ -326,7 +366,9 @@ where fn is_trap(&self) -> bool { match self.inst { - Inst::Trap { .. } => true, + Inst::Raw { + raw: RawInst::Trap { .. }, + } => true, _ => false, } } @@ -501,8 +543,10 @@ impl Inst { let format_ext = |ext: ExtKind| -> &'static str { match ext { ExtKind::None => "", - ExtKind::Sign => "_s", - ExtKind::Zero => "_u", + ExtKind::Sign32 => "_s32", + ExtKind::Sign64 => "_s64", + ExtKind::Zero32 => "_u32", + ExtKind::Zero64 => "_u64", } }; @@ -528,8 +572,6 @@ impl Inst { Inst::Unwind { inst } => format!("unwind {inst:?}"), - Inst::Trap { code } => format!("trap // code = {code:?}"), - Inst::TrapIf { cond, size, @@ -660,21 +702,45 @@ impl Inst { format!("{dst} = load_addr {mem}") } - Inst::Load { + Inst::XLoad { dst, mem, ty, flags, ext, } => { - let dst = format_reg(dst.to_reg()); + let dst = format_reg(*dst.to_reg()); let ty = ty.bits(); let ext = format_ext(*ext); let mem = mem.to_string(); - format!("{dst} = load{ty}{ext} {mem} // flags ={flags}") + format!("{dst} = xload{ty}{ext} {mem} // flags ={flags}") + } + + Inst::XStore { + mem, + src, + ty, + flags, + } => { + let ty = ty.bits(); + let mem = mem.to_string(); + let src = format_reg(**src); + format!("xstore{ty} {mem}, {src} // flags = {flags}") + } + + Inst::FLoad { + dst, + mem, + ty, + flags, + } => { + let dst = format_reg(*dst.to_reg()); + let ty = ty.bits(); + let mem = mem.to_string(); + format!("{dst} = fload{ty} {mem} // flags ={flags}") } - Inst::Store { + Inst::FStore { mem, src, ty, @@ -682,8 +748,8 @@ impl Inst { } => { let ty = ty.bits(); let mem = mem.to_string(); - let src = format_reg(*src); - format!("store{ty} {mem}, {src} // flags = {flags}") + let src = format_reg(**src); + format!("fstore{ty} {mem}, {src} // flags = {flags}") } Inst::BrTable { diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 461a9dd239ab..8e84c17058a6 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -167,6 +167,20 @@ (rule (lower (has_type $I64 (iadd a b))) (pulley_xadd64 a b)) +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I16 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I32 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I64 (isub a b))) + (pulley_xsub64 a b)) + ;;;; Rules for `idiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I32 (sdiv a b))) @@ -233,38 +247,69 @@ (rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b) (lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a)) -;;;; Rules for `load` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl amode (Value Offset32) Amode) +(rule (amode addr (offset32 offset)) (Amode.RegOffset addr offset)) + +(rule (lower (has_type (ty_int ty) (load flags addr offset))) + (pulley_xload (amode addr offset) ty flags (ExtKind.None))) + +(rule 1 (lower (has_type (ty_scalar_float ty) (load flags addr offset))) + (pulley_fload (amode addr offset) ty flags)) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Zero32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Zero32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.None))) + +(rule 1 (lower (has_type $I64 (uload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Zero64))) + +(rule 1 (lower (has_type $I64 (uload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Zero64))) + +(rule 1 (lower (has_type $I64 (uload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.Zero64))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Sign32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Sign32))) -(rule (lower (has_type ty (load flags addr (offset32 offset)))) - (let ((le Reg (pulley_load (Amode.RegOffset addr (i32_as_i64 offset)) - ty - flags - (ExtKind.Zero)))) - (xswap_if_be le ty flags))) +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.None))) +(rule 1 (lower (has_type $I64 (sload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Sign64))) -;;;; Rules for `store` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 1 (lower (has_type $I64 (sload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Sign64))) -(type Endianness extern (enum Big Little)) -(decl pure endianness (MemFlags) Endianness) -(extern constructor endianness endianness) +(rule 1 (lower (has_type $I64 (sload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64))) -(rule (lower (store flags src @ (value_type ty) addr (offset32 offset))) - (side_effect (pulley_store (Amode.RegOffset addr (i32_as_i64 offset)) - (xswap_if_be src ty flags) - ty - flags))) +;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl xswap_if_be (XReg Type MemFlags) XReg) -(rule (xswap_if_be val _ty flags) - (if-let (Endianness.Little) (endianness flags)) - val) -(rule (xswap_if_be val $I32 flags) - (if-let (Endianness.Big) (endianness flags)) - (pulley_bswap32 val)) -(rule (xswap_if_be val $I64 flags) - (if-let (Endianness.Big) (endianness flags)) - (pulley_bswap64 val)) +(rule (lower (store flags src @ (value_type ty) addr offset)) + (side_effect (pulley_xstore (amode addr offset) src ty flags))) + +(rule 1 (lower (store flags src @ (value_type (ty_scalar_float ty)) addr offset)) + (side_effect (pulley_fstore (amode addr offset) src ty flags))) + +(rule (lower (istore8 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I8 flags))) + +(rule (lower (istore16 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I16 flags))) + +(rule (lower (istore32 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I32 flags))) ;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -276,3 +321,33 @@ (let ((dst WritableXReg (temp_writable_xreg)) (_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) dst)) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I8)))) + (pulley_zext8 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I16)))) + (pulley_zext16 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I32)))) + (pulley_zext32 val)) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I8)))) + (pulley_sext8 val)) + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I16)))) + (pulley_sext16 val)) + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I32)))) + (pulley_sext32 val)) + +;;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (uadd_overflow_trap a b tc))) + (pulley_xadd32_uoverflow_trap a b tc)) + +(rule (lower (has_type $I64 (uadd_overflow_trap a b tc))) + (pulley_xadd64_uoverflow_trap a b tc)) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs index cbe78d7d31d0..ae61bbc18fbc 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -114,10 +114,6 @@ where fn lr_reg(&mut self) -> XReg { XReg::new(regs::lr_reg()).unwrap() } - - fn endianness(&mut self, flags: MemFlags) -> Endianness { - flags.endianness(self.backend.target_endianness()) - } } /// The main entry point for lowering with ISLE. diff --git a/cranelift/codegen/src/isa/pulley_shared/mod.rs b/cranelift/codegen/src/isa/pulley_shared/mod.rs index faf3a3ca72ef..5387fb3ae01c 100644 --- a/cranelift/codegen/src/isa/pulley_shared/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/mod.rs @@ -7,7 +7,6 @@ mod settings; use self::inst::EmitInfo; use super::{Builder as IsaBuilder, FunctionAlignment}; -use crate::ir::Endianness; use crate::{ dominator_tree::DominatorTree, ir, @@ -125,14 +124,6 @@ where let abi = abi::PulleyCallee::new(func, self, &self.isa_flags, &sigs)?; machinst::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) } - - pub fn target_endianness(&self) -> Endianness { - if self.isa_flags.big_endian() { - Endianness::Big - } else { - Endianness::Little - } - } } impl

TargetIsa for PulleyBackend

diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index ab77f37ab5a5..6386273c1078 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -133,12 +133,12 @@ block0: ; stack_alloc32 48 ; block0: ; xconst8 x15, 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x4d +; call 0x0 // target = 0x60 ; stack_free32 48 ; pop_frame ; ret @@ -228,18 +228,18 @@ block0: ; VCode: ; push_frame ; stack_alloc32 64 -; store64 sp+56, x18 // flags = notrap aligned -; store64 sp+48, x20 // flags = notrap aligned +; xstore64 sp+56, x18 // flags = notrap aligned +; xstore64 sp+48, x20 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x18, x13 ; xmov x20, x11 -; x24 = load64_u OutgoingArg(0) // flags = notrap aligned -; x11 = load64_u OutgoingArg(8) // flags = notrap aligned -; x13 = load64_u OutgoingArg(16) // flags = notrap aligned -; x19 = load64_u OutgoingArg(24) // flags = notrap aligned -; x21 = load64_u OutgoingArg(32) // flags = notrap aligned +; x24 = xload64 OutgoingArg(0) // flags = notrap aligned +; x11 = xload64 OutgoingArg(8) // flags = notrap aligned +; x13 = xload64 OutgoingArg(16) // flags = notrap aligned +; x19 = xload64 OutgoingArg(24) // flags = notrap aligned +; x21 = xload64 OutgoingArg(32) // flags = notrap aligned ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -265,8 +265,8 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; x18 = load64_u sp+56 // flags = notrap aligned -; x20 = load64_u sp+48 // flags = notrap aligned +; x18 = xload64 sp+56 // flags = notrap aligned +; x20 = xload64 sp+48 // flags = notrap aligned ; stack_free32 64 ; pop_frame ; ret @@ -274,17 +274,17 @@ block0: ; Disassembled: ; push_frame ; stack_alloc32 64 -; store64_offset8 sp, 56, x18 -; store64_offset8 sp, 48, x20 +; xstore64le_offset32 sp, 56, x18 +; xstore64le_offset32 sp, 48, x20 ; xmov x0, sp -; call 0x0 // target = 0x11 +; call 0x0 // target = 0x17 ; xmov x18, x13 ; xmov x20, x11 -; load64 x24, sp -; load64_offset8 x11, sp, 8 -; load64_offset8 x13, sp, 16 -; load64_offset8 x19, sp, 24 -; load64_offset8 x21, sp, 32 +; xload64le_offset32 x24, sp, 0 +; xload64le_offset32 x11, sp, 8 +; xload64le_offset32 x13, sp, 16 +; xload64le_offset32 x19, sp, 24 +; xload64le_offset32 x21, sp, 32 ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -310,8 +310,8 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; load64_offset8 x18, sp, 56 -; load64_offset8 x20, sp, 48 +; xload64le_offset32 x18, sp, 56 +; xload64le_offset32 x20, sp, 48 ; stack_free32 64 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/load.clif b/cranelift/filetests/filetests/isa/pulley32/load.clif index 3df82d3cbd03..82cc4c52aac8 100644 --- a/cranelift/filetests/filetests/isa/pulley32/load.clif +++ b/cranelift/filetests/filetests/isa/pulley32/load.clif @@ -9,11 +9,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load32_u x0+0 // flags = +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x0, x0 +; xload32le_offset32 x0, x0, 0 ; ret function %load_i64(i32) -> i64 { @@ -24,11 +24,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load64_u x0+0 // flags = +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x0, x0 +; xload64le_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i32) -> i32 { @@ -39,11 +39,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load32_u x0+4 // flags = +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x0, x0, 4 +; xload32le_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i32) -> i64 { @@ -54,10 +54,10 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load64_u x0+8 // flags = +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x0, x0, 8 +; xload64le_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/store.clif b/cranelift/filetests/filetests/isa/pulley32/store.clif index abefe343ca0e..5f87a2c2491d 100644 --- a/cranelift/filetests/filetests/isa/pulley32/store.clif +++ b/cranelift/filetests/filetests/isa/pulley32/store.clif @@ -9,11 +9,11 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; store32 x1+0, x0 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store32 x1, x0 +; xstore32le_offset32 x1, 0, x0 ; ret function %store_i64(i64, i32) { @@ -24,11 +24,11 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; store64 x1+0, x0 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store64 x1, x0 +; xstore64le_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i32) { @@ -39,11 +39,11 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; store32 x1+4, x0 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; store32_offset8 x1, 4, x0 +; xstore32le_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i32) { @@ -54,10 +54,10 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; store64 x1+8, x0 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; store64_offset8 x1, 8, x0 +; xstore64le_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index ab0757ad1eb5..99af0f918c83 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -8,7 +8,7 @@ block0: ; VCode: ; block0: -; trap // code = TrapCode(1) +; trap // trap=TrapCode(1) ; ; Disassembled: ; trap diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index e0b87760510a..0f47c4e765b8 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -133,12 +133,12 @@ block0: ; stack_alloc32 48 ; block0: ; xconst8 x15, 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x4d +; call 0x0 // target = 0x60 ; stack_free32 48 ; pop_frame ; ret @@ -228,18 +228,18 @@ block0: ; VCode: ; push_frame ; stack_alloc32 64 -; store64 sp+56, x18 // flags = notrap aligned -; store64 sp+48, x20 // flags = notrap aligned +; xstore64 sp+56, x18 // flags = notrap aligned +; xstore64 sp+48, x20 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x18, x13 ; xmov x20, x11 -; x24 = load64_u OutgoingArg(0) // flags = notrap aligned -; x11 = load64_u OutgoingArg(8) // flags = notrap aligned -; x13 = load64_u OutgoingArg(16) // flags = notrap aligned -; x19 = load64_u OutgoingArg(24) // flags = notrap aligned -; x21 = load64_u OutgoingArg(32) // flags = notrap aligned +; x24 = xload64 OutgoingArg(0) // flags = notrap aligned +; x11 = xload64 OutgoingArg(8) // flags = notrap aligned +; x13 = xload64 OutgoingArg(16) // flags = notrap aligned +; x19 = xload64 OutgoingArg(24) // flags = notrap aligned +; x21 = xload64 OutgoingArg(32) // flags = notrap aligned ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -265,8 +265,8 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; x18 = load64_u sp+56 // flags = notrap aligned -; x20 = load64_u sp+48 // flags = notrap aligned +; x18 = xload64 sp+56 // flags = notrap aligned +; x20 = xload64 sp+48 // flags = notrap aligned ; stack_free32 64 ; pop_frame ; ret @@ -274,17 +274,17 @@ block0: ; Disassembled: ; push_frame ; stack_alloc32 64 -; store64_offset8 sp, 56, x18 -; store64_offset8 sp, 48, x20 +; xstore64le_offset32 sp, 56, x18 +; xstore64le_offset32 sp, 48, x20 ; xmov x0, sp -; call 0x0 // target = 0x11 +; call 0x0 // target = 0x17 ; xmov x18, x13 ; xmov x20, x11 -; load64 x24, sp -; load64_offset8 x11, sp, 8 -; load64_offset8 x13, sp, 16 -; load64_offset8 x19, sp, 24 -; load64_offset8 x21, sp, 32 +; xload64le_offset32 x24, sp, 0 +; xload64le_offset32 x11, sp, 8 +; xload64le_offset32 x13, sp, 16 +; xload64le_offset32 x19, sp, 24 +; xload64le_offset32 x21, sp, 32 ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -310,8 +310,8 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; load64_offset8 x18, sp, 56 -; load64_offset8 x20, sp, 48 +; xload64le_offset32 x18, sp, 56 +; xload64le_offset32 x20, sp, 48 ; stack_free32 64 ; pop_frame ; ret @@ -359,14 +359,14 @@ block0: ; stack_alloc32 64 ; block0: ; xconst8 x15, 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned -; store64 OutgoingArg(48), x15 // flags = notrap aligned -; store64 OutgoingArg(56), x15 // flags = notrap aligned +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned +; xstore64 OutgoingArg(48), x15 // flags = notrap aligned +; xstore64 OutgoingArg(56), x15 // flags = notrap aligned ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -391,14 +391,14 @@ block0: ; push_frame ; stack_alloc32 64 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 -; store64_offset8 sp, 48, x15 -; store64_offset8 sp, 56, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 +; xstore64le_offset32 sp, 48, x15 +; xstore64le_offset32 sp, 56, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -414,7 +414,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x55 +; call 0x0 // target = 0x6e ; stack_free32 64 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/load.clif b/cranelift/filetests/filetests/isa/pulley64/load.clif index e39daa3ababc..3482e2c7d280 100644 --- a/cranelift/filetests/filetests/isa/pulley64/load.clif +++ b/cranelift/filetests/filetests/isa/pulley64/load.clif @@ -9,11 +9,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load32_u x0+0 // flags = +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x0, x0 +; xload32le_offset32 x0, x0, 0 ; ret function %load_i64(i64) -> i64 { @@ -24,11 +24,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load64_u x0+0 // flags = +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x0, x0 +; xload64le_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i64) -> i32 { @@ -39,11 +39,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load32_u x0+4 // flags = +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x0, x0, 4 +; xload32le_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i64) -> i64 { @@ -54,10 +54,10 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load64_u x0+8 // flags = +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x0, x0, 8 +; xload64le_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/loadbe.clif b/cranelift/filetests/filetests/isa/pulley64/loadbe.clif index ef8c6e0f2f4c..a0ad10694a85 100644 --- a/cranelift/filetests/filetests/isa/pulley64/loadbe.clif +++ b/cranelift/filetests/filetests/isa/pulley64/loadbe.clif @@ -9,13 +9,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load32_u x0+0 // flags = -; bswap32 x0, x2 +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x2, x0 -; bswap32 x0, x2 +; xload32be_u64_offset32 x0, x0, 0 ; ret function %load_i64(i64) -> i64 { @@ -26,13 +24,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load64_u x0+0 // flags = -; bswap64 x0, x2 +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x2, x0 -; bswap64 x0, x2 +; xload64be_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i64) -> i32 { @@ -43,13 +39,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load32_u x0+4 // flags = -; bswap32 x0, x2 +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x2, x0, 4 -; bswap32 x0, x2 +; xload32be_u64_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i64) -> i64 { @@ -60,12 +54,10 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load64_u x0+8 // flags = -; bswap64 x0, x2 +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x2, x0, 8 -; bswap64 x0, x2 +; xload64be_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/store.clif b/cranelift/filetests/filetests/isa/pulley64/store.clif index 966ee460658e..67cdf9763aa4 100644 --- a/cranelift/filetests/filetests/isa/pulley64/store.clif +++ b/cranelift/filetests/filetests/isa/pulley64/store.clif @@ -9,11 +9,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; store32 x1+0, x0 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store32 x1, x0 +; xstore32le_offset32 x1, 0, x0 ; ret function %store_i64(i64, i64) { @@ -24,11 +24,11 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; store64 x1+0, x0 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store64 x1, x0 +; xstore64le_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i64) { @@ -39,11 +39,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; store32 x1+4, x0 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; store32_offset8 x1, 4, x0 +; xstore32le_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i64) { @@ -54,10 +54,10 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; store64 x1+8, x0 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; store64_offset8 x1, 8, x0 +; xstore64le_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/storebe.clif b/cranelift/filetests/filetests/isa/pulley64/storebe.clif index d9d59f5c5a94..5f8b1270c152 100644 --- a/cranelift/filetests/filetests/isa/pulley64/storebe.clif +++ b/cranelift/filetests/filetests/isa/pulley64/storebe.clif @@ -9,13 +9,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; bswap32 x3, x0 -; store32 x1+0, x3 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; bswap32 x3, x0 -; store32 x1, x3 +; xstore32be_offset32 x1, 0, x0 ; ret function %store_i64(i64, i64) { @@ -26,13 +24,11 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; bswap64 x3, x0 -; store64 x1+0, x3 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; bswap64 x3, x0 -; store64 x1, x3 +; xstore64be_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i64) { @@ -43,13 +39,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; bswap32 x3, x0 -; store32 x1+4, x3 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; bswap32 x3, x0 -; store32_offset8 x1, 4, x3 +; xstore32be_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i64) { @@ -60,12 +54,10 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; bswap64 x3, x0 -; store64 x1+8, x3 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; bswap64 x3, x0 -; store64_offset8 x1, 8, x3 +; xstore64be_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index fa458ec90486..34811b40efd7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -8,7 +8,7 @@ block0: ; VCode: ; block0: -; trap // code = TrapCode(1) +; trap // trap=TrapCode(1) ; ; Disassembled: ; trap diff --git a/crates/cranelift/src/translate/code_translator/bounds_checks.rs b/crates/cranelift/src/translate/code_translator/bounds_checks.rs index b2379e335c1f..9993410fbe6b 100644 --- a/crates/cranelift/src/translate/code_translator/bounds_checks.rs +++ b/crates/cranelift/src/translate/code_translator/bounds_checks.rs @@ -155,6 +155,16 @@ pub fn bounds_check_and_compute_addr( return Ok(Unreachable); } + // Special case: if this is a 32-bit platform and the `offset_and_size` + // overflows the 32-bit address space then there's no hope of this ever + // being in-bounds. We can't represent `offset_and_size` in CLIF as the + // native pointer type anyway, so this is an unconditional trap. + if pointer_bit_width < 64 && offset_and_size >= (1 << pointer_bit_width) { + env.before_unconditionally_trapping_memory_access(builder)?; + env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); + return Ok(Unreachable); + } + // Special case for when we can completely omit explicit // bounds checks for 32-bit memories. // diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 4ad821ac9591..a99d422d1b4c 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1935,7 +1935,6 @@ impl Config { return WasmFeatures::SIMD | WasmFeatures::RELAXED_SIMD | WasmFeatures::TAIL_CALL - | WasmFeatures::FLOATS | WasmFeatures::MEMORY64 | WasmFeatures::GC_TYPES; } diff --git a/crates/wasmtime/src/runtime/vm/interpreter.rs b/crates/wasmtime/src/runtime/vm/interpreter.rs index c12c21e74efa..6d0349b9d504 100644 --- a/crates/wasmtime/src/runtime/vm/interpreter.rs +++ b/crates/wasmtime/src/runtime/vm/interpreter.rs @@ -96,9 +96,17 @@ impl InterpreterRef<'_> { // If the VM wants to call out to the host then dispatch that // here based on `sig`. Once that returns we can resume // execution at `resume`. + // + // Note that the `raise` libcall is handled specially here since + // longjmp/setjmp is handled differently than on the host. DoneReason::CallIndirectHost { id, resume } => { - self.call_indirect_host(id); - bytecode = resume; + if u32::from(id) == HostCall::Builtin(BuiltinFunctionIndex::raise()).index() { + self.longjmp(setjmp); + break false; + } else { + self.call_indirect_host(id); + bytecode = resume; + } } // If the VM trapped then process that here and return `false`. DoneReason::Trap(pc) => { @@ -141,22 +149,25 @@ impl InterpreterRef<'_> { // Trap was handled, yay! We don't use `jmp_buf`. TrapTest::Trap { jmp_buf: _ } => {} } + self.longjmp(setjmp); + } - // Perform a "longjmp" by restoring the "setjmp" context saved when this - // started. - // - // FIXME: this is not restoring callee-save state. For example if - // there's more than one Pulley activation on the stack that means that - // the previous one is expecting the callee (the host) to preserve all - // callee-save registers. That's not restored here which means with - // multiple activations we're effectively corrupting callee-save - // registers. - // - // One fix for this is to possibly update the `SystemV` ABI on pulley to - // have no callee-saved registers and make everything caller-saved. That - // would force all trampolines to save all state which is basically - // what we want as they'll naturally restore state if we later return to - // them. + /// Perform a "longjmp" by restoring the "setjmp" context saved when this + /// started. + /// + /// FIXME: this is not restoring callee-save state. For example if + /// there's more than one Pulley activation on the stack that means that + /// the previous one is expecting the callee (the host) to preserve all + /// callee-save registers. That's not restored here which means with + /// multiple activations we're effectively corrupting callee-save + /// registers. + /// + /// One fix for this is to possibly update the `SystemV` ABI on pulley to + /// have no callee-saved registers and make everything caller-saved. That + /// would force all trampolines to save all state which is basically + /// what we want as they'll naturally restore state if we later return to + /// them. + fn longjmp(&mut self, setjmp: Setjmp) { let Setjmp { sp, fp, lr } = setjmp; self.0[XReg::sp].set_ptr(sp); self.0[XReg::fp].set_ptr(fp); diff --git a/crates/wasmtime/src/runtime/vm/traphandlers.rs b/crates/wasmtime/src/runtime/vm/traphandlers.rs index 026d55d189b7..9e12cdc9716f 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers.rs @@ -599,6 +599,7 @@ impl CallThreadState { /// destructors on the stack, if there are any. unsafe fn unwind(&self) -> ! { debug_assert!(!self.jmp_buf.get().is_null()); + debug_assert!(self.jmp_buf.get() != CallThreadState::JMP_BUF_INTERPRETER_SENTINEL); traphandlers::wasmtime_longjmp(self.jmp_buf.get()); } diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 55bc58682298..a39e50b1669f 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -414,18 +414,25 @@ impl WastTest { "misc_testsuite/component-model/linking.wast", "misc_testsuite/component-model/nested.wast", "misc_testsuite/component-model/types.wast", + "misc_testsuite/control-flow.wast", "misc_testsuite/elem-ref-null.wast", "misc_testsuite/elem_drop.wast", "misc_testsuite/empty.wast", + "misc_testsuite/export-large-signature.wast", "misc_testsuite/fib.wast", "misc_testsuite/func-400-params.wast", "misc_testsuite/gc/more-rec-groups-than-types.wast", "misc_testsuite/gc/rec-group-funcs.wast", + "misc_testsuite/imported-memory-copy.wast", + "misc_testsuite/issue4857.wast", + "misc_testsuite/memory-copy.wast", + "misc_testsuite/partial-init-memory-segment.wast", "misc_testsuite/rs2wasm-add-func.wast", "misc_testsuite/stack_overflow.wast", "misc_testsuite/winch/misc.wast", + "misc_testsuite/winch/oob.wast", + "spec_testsuite/proposals/exception-handling/binary.wast", "threads/exports.wast", - "misc_testsuite/control-flow.wast", ]; if supported.iter().any(|part| self.path.ends_with(part)) { diff --git a/fuzz/fuzz_targets/pulley.rs b/fuzz/fuzz_targets/pulley.rs index d3bebbff4ee3..35b03494802c 100644 --- a/fuzz/fuzz_targets/pulley.rs +++ b/fuzz/fuzz_targets/pulley.rs @@ -1,7 +1,7 @@ #![no_main] use libfuzzer_sys::{arbitrary::*, fuzz_target}; -use pulley_interpreter_fuzz::{interp, roundtrip}; +use pulley_interpreter_fuzz::roundtrip; fuzz_target!(|data| { let _ = fuzz(data); @@ -11,9 +11,8 @@ fn fuzz(data: &[u8]) -> Result<()> { let _ = env_logger::try_init(); let mut u = Unstructured::new(data); - match u.int_in_range(0..=1)? { + match u.int_in_range(0..=0)? { 0 => roundtrip(Arbitrary::arbitrary_take_rest(u)?), - 1 => interp(Arbitrary::arbitrary_take_rest(u)?), _ => unreachable!(), } diff --git a/pulley/fuzz/src/interp.rs b/pulley/fuzz/src/interp.rs deleted file mode 100644 index 60efe05f5c61..000000000000 --- a/pulley/fuzz/src/interp.rs +++ /dev/null @@ -1,144 +0,0 @@ -use pulley_interpreter::{ - interp::{DoneReason, Vm}, - op::{self, ExtendedOp, Op}, - *, -}; -use std::ptr::NonNull; - -pub fn interp(ops: Vec) { - let _ = env_logger::try_init(); - - log::trace!("input: {ops:#?}"); - - let mut ops = ops; - ops.retain(|op| op_is_safe_for_fuzzing(op)); - // Make sure that we end with a `ret` so that the interpreter returns - // control to us instead of continuing off the end of the ops and into - // undefined memory. - ops.push(Op::Ret(op::Ret {})); - - log::trace!("filtered to only safe ops: {ops:#?}"); - - let mut encoded = vec![]; - for op in &ops { - op.encode(&mut encoded); - } - log::trace!("encoded: {encoded:?}"); - - let mut vm = Vm::new(); - unsafe { - let args = &[]; - let rets = &[]; - match vm.call(NonNull::from(&encoded[0]), args, rets.into_iter().copied()) { - DoneReason::ReturnToHost(rets) => assert_eq!(rets.count(), 0), - DoneReason::Trap(pc) => { - let pc = pc.as_ptr() as usize; - - let start = &encoded[0] as *const u8 as usize; - let end = encoded.last().unwrap() as *const u8 as usize; - assert!( - start <= pc && pc < end, - "pc should be in range {start:#018x}..{end:#018x}, got {pc:#018x}" - ); - - let index = pc - start; - assert_eq!(encoded[index], Opcode::ExtendedOp as u8); - let [a, b] = (ExtendedOpcode::Trap as u16).to_le_bytes(); - assert_eq!(encoded[index + 1], a); - assert_eq!(encoded[index + 2], b); - } - DoneReason::CallIndirectHost { .. } => unreachable!(), - }; - } -} - -fn op_is_safe_for_fuzzing(op: &Op) -> bool { - match op { - Op::Ret(_) => true, - Op::Jump(_) => false, - Op::BrIf(_) => false, - Op::BrIfNot(_) => false, - Op::BrIfXeq32(_) => false, - Op::BrIfXneq32(_) => false, - Op::BrIfXult32(_) => false, - Op::BrIfXulteq32(_) => false, - Op::BrIfXslt32(_) => false, - Op::BrIfXslteq32(_) => false, - Op::BrIfXeq64(_) => false, - Op::BrIfXneq64(_) => false, - Op::BrIfXult64(_) => false, - Op::BrIfXulteq64(_) => false, - Op::BrIfXslt64(_) => false, - Op::BrIfXslteq64(_) => false, - Op::Xmov(op::Xmov { dst, .. }) => !dst.is_special(), - Op::Fmov(_) => true, - Op::Vmov(_) => true, - Op::Xconst8(op::Xconst8 { dst, .. }) => !dst.is_special(), - Op::Xconst16(op::Xconst16 { dst, .. }) => !dst.is_special(), - Op::Xconst32(op::Xconst32 { dst, .. }) => !dst.is_special(), - Op::Xconst64(op::Xconst64 { dst, .. }) => !dst.is_special(), - Op::Load32U(_) => false, - Op::Load32S(_) => false, - Op::Load64(_) => false, - Op::Load32UOffset8(_) => false, - Op::Load32SOffset8(_) => false, - Op::Load32UOffset64(_) => false, - Op::Load32SOffset64(_) => false, - Op::Load64Offset8(_) => false, - Op::Load64Offset64(_) => false, - Op::Store32(_) => false, - Op::Store64(_) => false, - Op::Store32SOffset8(_) => false, - Op::Store32SOffset64(_) => false, - Op::Store64Offset8(_) => false, - Op::Store64Offset64(_) => false, - Op::BitcastIntFromFloat32(op::BitcastIntFromFloat32 { dst, .. }) => !dst.is_special(), - Op::BitcastIntFromFloat64(op::BitcastIntFromFloat64 { dst, .. }) => !dst.is_special(), - Op::BitcastFloatFromInt32(_) => true, - Op::BitcastFloatFromInt64(_) => true, - Op::ExtendedOp(op) => extended_op_is_safe_for_fuzzing(op), - Op::Call(_) => false, - Op::CallIndirect(_) => false, - Op::Xadd32(Xadd32 { operands, .. }) - | Op::Xadd64(Xadd64 { operands, .. }) - | Op::Xeq64(Xeq64 { operands, .. }) - | Op::Xneq64(Xneq64 { operands, .. }) - | Op::Xslt64(Xslt64 { operands, .. }) - | Op::Xslteq64(Xslteq64 { operands, .. }) - | Op::Xult64(Xult64 { operands, .. }) - | Op::Xulteq64(Xulteq64 { operands, .. }) - | Op::Xeq32(Xeq32 { operands, .. }) - | Op::Xneq32(Xneq32 { operands, .. }) - | Op::Xslt32(Xslt32 { operands, .. }) - | Op::Xslteq32(Xslteq32 { operands, .. }) - | Op::Xult32(Xult32 { operands, .. }) - | Op::Xulteq32(Xulteq32 { operands, .. }) - | Op::XDiv32S(XDiv32S { operands, .. }) - | Op::XAnd32(XAnd32 { operands, .. }) => !operands.dst.is_special(), - Op::PushFrame(_) | Op::PopFrame(_) => false, - Op::XPush32(_) | Op::XPush64(_) => false, - Op::XPop32(_) | Op::XPop64(_) => false, - Op::XPush32Many(_) | Op::XPush64Many(_) => false, - Op::XPop32Many(_) | Op::XPop64Many(_) => false, - Op::BrTable32(_) => false, - Op::StackAlloc32(_) => false, - Op::StackFree32(_) => false, - Op::Zext8(Zext8 { dst, .. }) - | Op::Zext16(Zext16 { dst, .. }) - | Op::Zext32(Zext32 { dst, .. }) - | Op::Sext8(Sext8 { dst, .. }) - | Op::Sext32(Sext32 { dst, .. }) - | Op::Sext16(Sext16 { dst, .. }) => !dst.is_special(), - } -} - -fn extended_op_is_safe_for_fuzzing(op: &ExtendedOp) -> bool { - match op { - ExtendedOp::Trap(_) => true, - ExtendedOp::Nop(_) => true, - ExtendedOp::CallIndirectHost(_) => false, - ExtendedOp::Bswap32(Bswap32 { dst, .. }) | ExtendedOp::Bswap64(Bswap64 { dst, .. }) => { - !dst.is_special() - } - } -} diff --git a/pulley/fuzz/src/lib.rs b/pulley/fuzz/src/lib.rs index b041d1676312..d42468227ad8 100644 --- a/pulley/fuzz/src/lib.rs +++ b/pulley/fuzz/src/lib.rs @@ -1,5 +1,2 @@ mod roundtrip; pub use roundtrip::*; - -mod interp; -pub use interp::*; diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 468f5cb7609c..d5e872e2b24e 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -10,7 +10,7 @@ use core::fmt; use core::mem; use core::ops::ControlFlow; use core::ops::{Index, IndexMut}; -use core::ptr::{self, NonNull}; +use core::ptr::NonNull; use sptr::Strict; #[cfg(not(pulley_tail_calls))] @@ -799,6 +799,22 @@ impl Interpreter<'_> { } self.state[XReg::sp].set_ptr(sp); } + + unsafe fn load(&self, ptr: XReg, offset: i32) -> T { + unsafe { + self.state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .read_unaligned() + } + } + + unsafe fn store(&self, ptr: XReg, offset: i32, val: T) { + self.state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .write_unaligned(val) + } } #[test] @@ -1059,6 +1075,44 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_add(b) { + Some(c) => { + self.state[operands.dst].set_u32(c); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_add(b) { + Some(c) => { + self.state[operands.dst].set_u64(c); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xsub32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_sub(b)); + ControlFlow::Continue(()) + } + + fn xsub64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a.wrapping_sub(b)); + ControlFlow::Continue(()) + } + fn xeq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); @@ -1143,163 +1197,134 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } - fn load32_u(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { u32::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_u64(u64::from(val)); + fn xload8_u32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u32(val.into()); ControlFlow::Continue(()) } - fn load32_s(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { i32::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_i64(i64::from(val)); + fn xload8_s32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(val.into()); ControlFlow::Continue(()) } - fn load64(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { u64::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_u64(val); + fn xload16le_u32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u32(u16::from_le(val).into()); ControlFlow::Continue(()) } - fn load32_u_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - u32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(u64::from(val)); + fn xload16le_s32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(i16::from_le(val).into()); ControlFlow::Continue(()) } - fn load32_s_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - i32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_i64(i64::from(val)); + fn xload32le_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(i32::from_le(val)); ControlFlow::Continue(()) } - fn load32_u_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - u32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(u64::from(val)); + fn xload8_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(val.into()); ControlFlow::Continue(()) } - fn load32_s_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - i32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_i64(i64::from(val)); + fn xload8_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(val.into()); ControlFlow::Continue(()) } - fn load64_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - u64::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(val); + fn xload16le_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u16::from_le(val).into()); ControlFlow::Continue(()) } - fn load64_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - u64::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(val); + fn xload16le_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i16::from_le(val).into()); ControlFlow::Continue(()) } - fn store32(&mut self, ptr: XReg, src: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = self.state[src].get_u32(); + fn xload32le_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u32::from_le(val).into()); + ControlFlow::Continue(()) + } + + fn xload32le_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i32::from_le(val).into()); + ControlFlow::Continue(()) + } + + fn xload64le_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i64::from_le(val)); + ControlFlow::Continue(()) + } + + fn xstore8_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u8; unsafe { - ptr::write_unaligned(ptr, val.to_le()); + self.store(ptr, offset, val); } ControlFlow::Continue(()) } - fn store64(&mut self, ptr: XReg, src: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = self.state[src].get_u64(); + fn xstore16le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u16; unsafe { - ptr::write_unaligned(ptr, val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store32_offset8(&mut self, ptr: XReg, offset: i8, src: XReg) -> ControlFlow { + fn xstore32le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { let val = self.state[src].get_u32(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store64_offset8(&mut self, ptr: XReg, offset: i8, src: XReg) -> ControlFlow { + fn xstore64le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { let val = self.state[src].get_u64(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store32_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> ControlFlow { - let val = self.state[src].get_u32(); + fn fload32le_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f32(f32::from_bits(u32::from_le(val))); + ControlFlow::Continue(()) + } + + fn fload64le_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f64(f64::from_bits(u64::from_le(val))); + ControlFlow::Continue(()) + } + + fn fstore32le_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f32(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_bits().to_le()); } ControlFlow::Continue(()) } - fn store64_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> ControlFlow { - let val = self.state[src].get_u64(); + fn fstore64le_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f64(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_bits().to_le()); } ControlFlow::Continue(()) } @@ -1505,4 +1530,86 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[dst].set_u64(src.swap_bytes()); ControlFlow::Continue(()) } + + fn xload16be_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u16::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload16be_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i16::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload32be_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u32::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload32be_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i32::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload64be_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i64::from_be(val)); + ControlFlow::Continue(()) + } + + fn xstore16be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u16; + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn xstore32be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32(); + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn xstore64be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u64(); + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn fload32be_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f32(f32::from_bits(u32::from_be(val))); + ControlFlow::Continue(()) + } + + fn fload64be_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f64(f64::from_bits(u64::from_be(val))); + ControlFlow::Continue(()) + } + + fn fstore32be_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f32(); + unsafe { + self.store(ptr, offset, val.to_bits().to_be()); + } + ControlFlow::Continue(()) + } + + fn fstore64be_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f64(); + unsafe { + self.store(ptr, offset, val.to_bits().to_be()); + } + ControlFlow::Continue(()) + } } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index e0dbbe289c5b..112a93b4d04e 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -15,6 +15,74 @@ extern crate std; extern crate alloc; /// Calls the given macro with each opcode. +/// +/// # Instruction Guidelines +/// +/// We're inventing an instruction set here which naturally brings a whole set +/// of design questions. Note that this is explicitly intended to be only ever +/// used for Pulley where there are a different set of design constraints than +/// other instruction sets (e.g. general-purpose CPU ISAs). Some examples of +/// constraints for Pulley are: +/// +/// * Instructions must be portable to many architectures. +/// * The Pulley ISA is mostly target-independent as the compilation target is +/// currently only parameterized on pointer width and endianness. +/// * Pulley instructions should be balance of time-to-decode and code size. For +/// example super fancy bit-packing tricks might be tough to decode in +/// software but might be worthwhile if it's quite common and greatly reduces +/// the size of bytecode. There's not a hard-and-fast answer here, but a +/// balance to be made. +/// * Many "macro ops" are present to reduce the size of compiled bytecode so +/// there is a wide set of duplicate functionality between opcodes (and this +/// is expected). +/// +/// Given all this it's also useful to have a set of guidelines used to name and +/// develop Pulley instructions. As of the time of this writing it's still +/// pretty early days for Pulley so some of these guidelines may change over +/// time. Additionally instructions don't necessarily all follow these +/// conventions and that may also change over time. With that in mind, here's a +/// rough set of guidelines: +/// +/// * Most instructions are prefixed with `x`, `f`, or `v`, indicating which +/// type of register they're operating on. (e.g. `xadd32` operates on the `x` +/// integer registers and `fadd32` operates on the `f` float registers). +/// +/// * Most instructions are suffixed or otherwise contain the bit width they're +/// operating on. For example `xadd32` is a 32-bit addition. +/// +/// * If an instruction operates on signed or unsigned data (such as division +/// and remainder), then the instruction is suffixed with `_s` or `_u`. +/// +/// * Instructions operate on either 32 or 64-bit parts of a register. +/// Instructions modifying only 32-bits of a register always modify the "low" +/// part of a register and leave the upper part unmodified. This is intended +/// to help 32-bit platforms where if most operations are 32-bit there's no +/// need for extra instructions to sign or zero extend and modify the upper +/// half of the register. +/// +/// * Binops use `BinaryOperands` for the destination and argument registers. +/// +/// * Instructions operating on memory contain a few pieces of information: +/// +/// ```text +/// xload16le_u32_offset32 +/// │└─┬┘└┤└┤ └┬┘ └──┬───┘ +/// │ │ │ │ │ ▼ +/// │ │ │ │ │ addressing mode +/// │ │ │ │ ▼ +/// │ │ │ │ width of register modified + sign-extension (optional) +/// │ │ │ ▼ +/// │ │ │ endianness of the operation (le/be) +/// │ │ ▼ +/// │ │ bit-width of the operation +/// │ ▼ +/// │ what's happening (load/store) +/// ▼ +/// register being operated on (x/f/z) +/// ``` +/// +/// More guidelines might get added here over time, and if you have any +/// questions feel free to raise them and we can try to add them here as well! #[macro_export] macro_rules! for_each_op { ( $macro:ident ) => { @@ -98,6 +166,24 @@ macro_rules! for_each_op { /// 64-bit wrapping addition: `dst = src1 + src2`. xadd64 = Xadd64 { operands: BinaryOperands }; + /// 32-bit checked unsigned addition: `low32(dst) = low32(src1) + + /// low32(src2)`. + /// + /// The upper 32-bits of `dst` are unmodified. Traps if the addition + /// overflows. + xadd32_uoverflow_trap = Xadd32UoverflowTrap { operands: BinaryOperands }; + + /// 64-bit checked unsigned addition: `dst = src1 + src2`. + xadd64_uoverflow_trap = Xadd64UoverflowTrap { operands: BinaryOperands }; + + /// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`. + /// + /// The upper 32-bits of `dst` are unmodified. + xsub32 = Xsub32 { operands: BinaryOperands }; + + /// 64-bit wrapping subtraction: `dst = src1 - src2`. + xsub64 = Xsub64 { operands: BinaryOperands }; + /// 64-bit equality. xeq64 = Xeq64 { operands: BinaryOperands }; /// 64-bit inequality. @@ -123,41 +209,49 @@ macro_rules! for_each_op { /// 32-bit unsigned less-than-equal. xulteq32 = Xulteq32 { operands: BinaryOperands }; - /// `dst = zero_extend(load32_le(ptr))` - load32_u = Load32U { dst: XReg, ptr: XReg }; - /// `dst = sign_extend(load32_le(ptr))` - load32_s = Load32S { dst: XReg, ptr: XReg }; - /// `dst = load64_le(ptr)` - load64 = Load64 { dst: XReg, ptr: XReg }; - - /// `dst = zero_extend(load32_le(ptr + offset8))` - load32_u_offset8 = Load32UOffset8 { dst: XReg, ptr: XReg, offset: i8 }; - /// `dst = sign_extend(load32_le(ptr + offset8))` - load32_s_offset8 = Load32SOffset8 { dst: XReg, ptr: XReg, offset: i8 }; - /// `dst = load64_le(ptr + offset8)` - load64_offset8 = Load64Offset8 { dst: XReg, ptr: XReg, offset: i8 }; - - /// `dst = zero_extend(load32_le(ptr + offset64))` - load32_u_offset64 = Load32UOffset64 { dst: XReg, ptr: XReg, offset: i64 }; - /// `dst = sign_extend(load32_le(ptr + offset64))` - load32_s_offset64 = Load32SOffset64 { dst: XReg, ptr: XReg, offset: i64 }; - /// `dst = load64_le(ptr + offset64)` - load64_offset64 = Load64Offset64 { dst: XReg, ptr: XReg, offset: i64 }; - - /// `*ptr = low32(src.to_le())` - store32 = Store32 { ptr: XReg, src: XReg }; - /// `*ptr = src.to_le()` - store64 = Store64 { ptr: XReg, src: XReg }; - - /// `*(ptr + sign_extend(offset8)) = low32(src).to_le()` - store32_offset8 = Store32SOffset8 { ptr: XReg, offset: i8, src: XReg }; - /// `*(ptr + sign_extend(offset8)) = src.to_le()` - store64_offset8 = Store64Offset8 { ptr: XReg, offset: i8, src: XReg }; - - /// `*(ptr + sign_extend(offset64)) = low32(src).to_le()` - store32_offset64 = Store32SOffset64 { ptr: XReg, offset: i64, src: XReg }; - /// `*(ptr + sign_extend(offset64)) = src.to_le()` - store64_offset64 = Store64Offset64 { ptr: XReg, offset: i64, src: XReg }; + /// `low32(dst) = zext(*(ptr + offset))` + xload8_u32_offset32 = XLoad8U32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload8_s32_offset32 = XLoad8S32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = zext(*(ptr + offset))` + xload16le_u32_offset32 = XLoad16LeU32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload16le_s32_offset32 = XLoad16LeS32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = *(ptr + offset)` + xload32le_offset32 = XLoad32LeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `dst = zext(*(ptr + offset))` + xload8_u64_offset32 = XLoad8U64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload8_s64_offset32 = XLoad8S64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload16le_u64_offset32 = XLoad16LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload16le_s64_offset32 = XLoad16LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload32le_u64_offset32 = XLoad32LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload32le_s64_offset32 = XLoad32LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + xload64le_offset32 = XLoad64LeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `*(ptr + offset) = low8(src)` + xstore8_offset32 = XStore8Offset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low16(src)` + xstore16le_offset32 = XStore16LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low32(src)` + xstore32le_offset32 = XStore32LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low64(src)` + xstore64le_offset32 = XStore64LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + + /// `low32(dst) = zext(*(ptr + offset))` + fload32le_offset32 = Fload32LeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + fload64le_offset32 = Fload64LeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = low32(src)` + fstore32le_offset32 = Fstore32LeOffset32 { ptr: XReg, offset: i32, src: FReg }; + /// `*(ptr + offset) = src` + fstore64le_offset32 = Fstore64LeOffset32 { ptr: XReg, offset: i32, src: FReg }; /// `push lr; push fp; fp = sp` push_frame = PushFrame ; @@ -254,6 +348,34 @@ macro_rules! for_each_extended_op { bswap32 = Bswap32 { dst: XReg, src: XReg }; /// `dst = byteswap(src)` bswap64 = Bswap64 { dst: XReg, src: XReg }; + + + /// `dst = zext(*(ptr + offset))` + xload16be_u64_offset32 = XLoad16BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload16be_s64_offset32 = XLoad16BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload32be_u64_offset32 = XLoad32BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload32be_s64_offset32 = XLoad32BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + xload64be_offset32 = XLoad64BeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `*(ptr + offset) = low16(src)` + xstore16be_offset32 = XStore16BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low32(src)` + xstore32be_offset32 = XStore32BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low64(src)` + xstore64be_offset32 = XStore64BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + + /// `low32(dst) = zext(*(ptr + offset))` + fload32be_offset32 = Fload32BeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + fload64be_offset32 = Fload64BeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = low32(src)` + fstore32be_offset32 = Fstore32BeOffset32 { ptr: XReg, offset: i32, src: FReg }; + /// `*(ptr + offset) = src` + fstore64be_offset32 = Fstore64BeOffset32 { ptr: XReg, offset: i32, src: FReg }; } }; } diff --git a/pulley/tests/all/interp.rs b/pulley/tests/all/interp.rs index 2dc6deb50173..34dae9db7f3d 100644 --- a/pulley/tests/all/interp.rs +++ b/pulley/tests/all/interp.rs @@ -525,203 +525,7 @@ fn xulteq32() { } #[test] -fn load32_u() { - let a = UnsafeCell::new(11u32.to_le()); - let b = UnsafeCell::new(22u32.to_le()); - let c = UnsafeCell::new(33u32.to_le()); - let d = UnsafeCell::new((i32::MIN as u32).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (i32::MIN as u32 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32U { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_s() { - let a = UnsafeCell::new(11u32.to_le()); - let b = UnsafeCell::new(22u32.to_le()); - let c = UnsafeCell::new(33u32.to_le()); - let d = UnsafeCell::new((-1i32 as u32).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (-1i64 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32S { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load64() { - let a = UnsafeCell::new(11u64.to_le()); - let b = UnsafeCell::new(22u64.to_le()); - let c = UnsafeCell::new(33u64.to_le()); - let d = UnsafeCell::new((-1i64 as u64).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (-1i64 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr)), - ], - Load64 { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_u_offset8() { - let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); - let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); - let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); - let d = UnsafeCell::new([(i32::MIN as u32).to_le(), (i32::MAX as u32).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 4), - (33, b.get(), 0), - (44, b.get(), 4), - (55, c.get(), 0), - (66, c.get(), 4), - (i32::MIN as u32 as u64, d.get(), 0), - (i32::MAX as u32 as u64, d.get(), 4), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32UOffset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_s_offset8() { - let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); - let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); - let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); - let d = UnsafeCell::new([(-1i32 as u32).to_le(), (i32::MAX as u32).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 4), - (33, b.get(), 0), - (44, b.get(), 4), - (55, c.get(), 0), - (55, unsafe { c.get().byte_add(4) }, -4), - (66, c.get(), 4), - (-1i64 as u64, d.get(), 0), - (i32::MAX as u32 as u64, d.get(), 4), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32SOffset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load64_offset8() { - let a = UnsafeCell::new([11u64.to_le(), 22u64.to_le()]); - let b = UnsafeCell::new([33u64.to_le(), 44u64.to_le()]); - let c = UnsafeCell::new([55u64.to_le(), 66u64.to_le()]); - let d = UnsafeCell::new([(-1i64 as u64).to_le(), (i64::MAX as u64).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 8), - (33, b.get(), 0), - (44, b.get(), 8), - (55, c.get(), 0), - (66, c.get(), 8), - (-1i64 as u64, d.get(), 0), - (i64::MAX as u64, d.get(), 8), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr)), - ], - Load64Offset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_u_offset64() { +fn xload32le_u64_offset32() { let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); @@ -743,7 +547,7 @@ fn load32_u_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr.cast::())), ], - Load32UOffset64 { + XLoad32LeU64Offset32 { dst: x(0), ptr: x(1), offset, @@ -756,7 +560,7 @@ fn load32_u_offset64() { } #[test] -fn load32_s_offset64() { +fn xload32le_s64_offset32() { let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); @@ -779,7 +583,7 @@ fn load32_s_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr.cast::())), ], - Load32SOffset64 { + XLoad32LeS64Offset32 { dst: x(0), ptr: x(1), offset, @@ -792,7 +596,7 @@ fn load32_s_offset64() { } #[test] -fn load64_offset64() { +fn xload64le_offset32() { let a = UnsafeCell::new([11u64.to_le(), 22u64.to_le()]); let b = UnsafeCell::new([33u64.to_le(), 44u64.to_le()]); let c = UnsafeCell::new([55u64.to_le(), 66u64.to_le()]); @@ -814,7 +618,7 @@ fn load64_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr)), ], - Load64Offset64 { + XLoad64LeOffset32 { dst: x(0), ptr: x(1), offset, @@ -827,180 +631,7 @@ fn load64_offset64() { } #[test] -fn store32() { - let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - - unsafe { - for (val, addr) in [ - (0x11111111u32, a.get()), - (0x22222222, b.get().byte_add(4)), - (0x33333333, c.get().byte_add(2)), - ] { - let val = val as u64; - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32 { - ptr: x(0), - src: x(1), - }, - x(1), - val, - ); - } - } - - let a = u64::from_be_bytes(a.into_inner()); - let expected = 0x1111111112345678u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = u64::from_be_bytes(b.into_inner()); - let expected = 0x1234567822222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = u64::from_be_bytes(c.into_inner()); - let expected = 0x1234333333335678u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store64() { - let a = UnsafeCell::new(0x1234567812345678); - let b = UnsafeCell::new(0x1234567812345678); - let c = UnsafeCell::new(0x1234567812345678); - - unsafe { - for (val, addr) in [ - (0x1111111111111111u64, a.get()), - (0x2222222222222222, b.get()), - (0x3333333333333333, c.get()), - ] { - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64 { - ptr: x(0), - src: x(1), - }, - x(1), - val, - ); - } - } - - let a = a.into_inner(); - let expected = 0x1111111111111111u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = b.into_inner(); - let expected = 0x2222222222222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = c.into_inner(); - let expected = 0x3333333333333333u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store32_offset8() { - let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - - unsafe { - for (val, addr, offset) in [ - (0x11111111u32, a.get(), 0), - (0x22222222, b.get(), 4), - (0x33333333, c.get(), 2), - ] { - let val = val as u64; - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32SOffset8 { - ptr: x(0), - src: x(1), - offset, - }, - x(1), - val, - ); - } - } - - let a = u64::from_be_bytes(a.into_inner()); - let expected = 0x1111111112345678u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = u64::from_be_bytes(b.into_inner()); - let expected = 0x1234567822222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = u64::from_be_bytes(c.into_inner()); - let expected = 0x1234333333335678u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store64_offset8() { - let a = UnsafeCell::new([0x1234567812345678, 0x1234567812345678, 0x1234567812345678]); - - unsafe { - for (val, addr, offset) in [ - (0x1111111111111111u64, a.get(), 0), - (0x2222222222222222, a.get(), 8), - (0x3333333333333333, a.get(), 16), - ] { - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64Offset8 { - ptr: x(0), - src: x(1), - offset, - }, - x(1), - val, - ); - } - } - - let [a, b, c] = a.into_inner(); - - let expected = 0x1111111111111111u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let expected = 0x2222222222222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let expected = 0x3333333333333333u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store32_offset64() { +fn xstore32_le_offset32() { let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); @@ -1014,7 +645,7 @@ fn store32_offset64() { let val = val as u64; assert_one( [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32SOffset64 { + XStore32LeOffset32 { ptr: x(0), src: x(1), offset, @@ -1045,7 +676,7 @@ fn store32_offset64() { } #[test] -fn store64_offset64() { +fn xstore64_le_offset32() { let a = UnsafeCell::new([0x1234567812345678, 0x1234567812345678, 0x1234567812345678]); unsafe { @@ -1056,7 +687,7 @@ fn store64_offset64() { ] { assert_one( [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64Offset64 { + XStore64LeOffset32 { ptr: x(0), src: x(1), offset, diff --git a/tests/disas/pulley/call.wat b/tests/disas/pulley/call.wat index 238e2b363d2a..57f6f28d4349 100644 --- a/tests/disas/pulley/call.wat +++ b/tests/disas/pulley/call.wat @@ -7,9 +7,9 @@ ) ;; wasm[0]::function[1]: ;; push_frame -;; load32_u_offset8 x3, x0, 44 +;; xload32le_offset32 x3, x0, 44 ;; xmov x6, x0 -;; load32_u_offset8 x0, x6, 52 +;; xload32le_offset32 x0, x6, 52 ;; xmov x1, x6 ;; call_indirect x3 ;; pop_frame diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index 1f503e82902e..94d40d567123 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -7,13 +7,13 @@ ) ;; wasm[0]::function[0]: ;; push_frame -;; load64_offset8 x7, x0, 8 -;; load64_offset8 x8, x0, 32 -;; load64 x8, x8 -;; load64_offset8 x7, x7, 8 +;; xload64le_offset32 x7, x0, 8 +;; xload64le_offset32 x8, x0, 32 +;; xload64le_offset32 x8, x8, 0 +;; xload64le_offset32 x7, x7, 8 ;; xulteq64 x7, x7, x8 -;; br_if x7, 0x8 // target = 0x1b -;; 19: pop_frame +;; br_if x7, 0x8 // target = 0x28 +;; 26: pop_frame ;; ret -;; 1b: call 0x83 // target = 0x9e -;; 20: jump 0xfffffffffffffff9 // target = 0x19 +;; 28: call 0x9c // target = 0xc4 +;; 2d: jump 0xfffffffffffffff9 // target = 0x26 From 5eee631311eee9ff774842cee35f5d6318965d40 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 10 Dec 2024 15:56:25 -0800 Subject: [PATCH 15/30] Wasmtime: support a notion of "custom code publisher". (#9778) * Wasmtime: support a notion of "custom code publisher". In some `no_std` environments, virtual memory usage is *generally* prohibited for performance-predictability reasons, but the MMU hardware is still in use for permissions (e.g., `W^X` write-xor-execute). Occasional changes to page mapping permissions are thus necessary when new modules are loaded dynamically, and are acceptable in that context. Wasmtime needs a way to support "publishing" code (making it executable) in such environments. Rather than try to segment the `signals-based-traps` divide further, and piece out the code-publishing parts from the heap parts, and backdoor a path to `mprotect` in an otherwise `no_std` build, in this PR I have opted to add a trait an impl of which the embedder can provide to the `Config` to implement custom actions for "code publish". This otherwise operates properly in a no-`signals-based-traps` environment, e.g., the module backing memory itself is regularly allocated rather than mmap'd (but is now aligned to the degree requested by the trait impl). * Review feedback. * Plumb through custom alignment for runtime code generation * Add a test for custom code memory. --- .../environ/src/compile/module_artifacts.rs | 8 +- crates/wasmtime/src/compile.rs | 6 +- crates/wasmtime/src/compile/code_builder.rs | 4 +- crates/wasmtime/src/compile/runtime.rs | 69 +++++++--- crates/wasmtime/src/config.rs | 33 +++++ crates/wasmtime/src/engine.rs | 26 +++- crates/wasmtime/src/runtime/code_memory.rs | 119 ++++++++++++++---- crates/wasmtime/src/runtime/vm/mmap_vec.rs | 96 +++++++++++--- tests/all/custom_code_memory.rs | 53 ++++++++ tests/all/main.rs | 1 + 10 files changed, 353 insertions(+), 62 deletions(-) create mode 100644 tests/all/custom_code_memory.rs diff --git a/crates/environ/src/compile/module_artifacts.rs b/crates/environ/src/compile/module_artifacts.rs index 7d9d132eddd6..9d992e25405c 100644 --- a/crates/environ/src/compile/module_artifacts.rs +++ b/crates/environ/src/compile/module_artifacts.rs @@ -274,12 +274,16 @@ impl<'a> ObjectBuilder<'a> { /// A type which can be the result of serializing an object. pub trait FinishedObject: Sized { + /// State required for `finish_object`, if any. + type State; + /// Emit the object as `Self`. - fn finish_object(obj: ObjectBuilder<'_>) -> Result; + fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result; } impl FinishedObject for Vec { - fn finish_object(obj: ObjectBuilder<'_>) -> Result { + type State = (); + fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result { let mut result = ObjectVec::default(); obj.finish(&mut result)?; return Ok(result.0); diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 1222fb9607c4..1598bbf05e51 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -64,6 +64,7 @@ pub(crate) fn build_artifacts( engine: &Engine, wasm: &[u8], dwarf_package: Option<&[u8]>, + obj_state: &T::State, ) -> Result<(T, Option<(CompiledModuleInfo, ModuleTypes)>)> { let tunables = engine.tunables(); @@ -111,7 +112,7 @@ pub(crate) fn build_artifacts( let info = compilation_artifacts.unwrap_as_module_info(); let types = types.finish(); object.serialize_info(&(&info, &types)); - let result = T::finish_object(object)?; + let result = T::finish_object(object, obj_state)?; Ok((result, Some((info, types)))) } @@ -128,6 +129,7 @@ pub(crate) fn build_component_artifacts( engine: &Engine, binary: &[u8], _dwarf_package: Option<&[u8]>, + obj_state: &T::State, ) -> Result<(T, Option)> { use wasmtime_environ::component::{ CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder, @@ -186,7 +188,7 @@ pub(crate) fn build_component_artifacts( }; object.serialize_info(&artifacts); - let result = T::finish_object(object)?; + let result = T::finish_object(object, obj_state)?; Ok((result, Some(artifacts))) } diff --git a/crates/wasmtime/src/compile/code_builder.rs b/crates/wasmtime/src/compile/code_builder.rs index ec8b543292c7..d6b33fb25f1f 100644 --- a/crates/wasmtime/src/compile/code_builder.rs +++ b/crates/wasmtime/src/compile/code_builder.rs @@ -274,7 +274,7 @@ impl<'a> CodeBuilder<'a> { pub fn compile_module_serialized(&self) -> Result> { let wasm = self.get_wasm()?; let dwarf_package = self.get_dwarf_package(); - let (v, _) = super::build_artifacts(self.engine, &wasm, dwarf_package.as_deref())?; + let (v, _) = super::build_artifacts(self.engine, &wasm, dwarf_package.as_deref(), &())?; Ok(v) } @@ -284,7 +284,7 @@ impl<'a> CodeBuilder<'a> { #[cfg(feature = "component-model")] pub fn compile_component_serialized(&self) -> Result> { let bytes = self.get_wasm()?; - let (v, _) = super::build_component_artifacts(self.engine, &bytes, None)?; + let (v, _) = super::build_component_artifacts(self.engine, &bytes, None, &())?; Ok(v) } } diff --git a/crates/wasmtime/src/compile/runtime.rs b/crates/wasmtime/src/compile/runtime.rs index 781be44829e8..23a40eaaf433 100644 --- a/crates/wasmtime/src/compile/runtime.rs +++ b/crates/wasmtime/src/compile/runtime.rs @@ -9,9 +9,15 @@ use std::sync::Arc; use wasmtime_environ::{FinishedObject, ObjectBuilder, ObjectKind}; impl<'a> CodeBuilder<'a> { - fn compile_cached( + fn compile_cached( &self, - build_artifacts: fn(&Engine, &[u8], Option<&[u8]>) -> Result<(MmapVecWrapper, Option)>, + build_artifacts: fn( + &Engine, + &[u8], + Option<&[u8]>, + &S, + ) -> Result<(MmapVecWrapper, Option)>, + state: &S, ) -> Result<(Arc, Option)> { let wasm = self.get_wasm()?; let dwarf_package = self.get_dwarf_package(); @@ -28,24 +34,32 @@ impl<'a> CodeBuilder<'a> { &dwarf_package, // Don't hash this as it's just its own "pure" function pointer. NotHashed(build_artifacts), + // Don't hash the FinishedObject state: this contains + // things like required runtime alignment, and does + // not impact the compilation result itself. + NotHashed(state), ); let (code, info_and_types) = wasmtime_cache::ModuleCacheEntry::new("wasmtime", self.engine.cache_config()) .get_data_raw( &state, // Cache miss, compute the actual artifacts - |(engine, wasm, dwarf_package, build_artifacts)| -> Result<_> { - let (mmap, info) = - (build_artifacts.0)(engine.0, wasm, dwarf_package.as_deref())?; - let code = publish_mmap(mmap.0)?; + |(engine, wasm, dwarf_package, build_artifacts, state)| -> Result<_> { + let (mmap, info) = (build_artifacts.0)( + engine.0, + wasm, + dwarf_package.as_deref(), + state.0, + )?; + let code = publish_mmap(engine.0, mmap.0)?; Ok((code, info)) }, // Implementation of how to serialize artifacts - |(_engine, _wasm, _, _), (code, _info_and_types)| { + |(_engine, _wasm, _, _, _), (code, _info_and_types)| { Some(code.mmap().to_vec()) }, // Cache hit, deserialize the provided artifacts - |(engine, wasm, _, _), serialized_bytes| { + |(engine, wasm, _, _, _), serialized_bytes| { let kind = if wasmparser::Parser::is_component(&wasm) { ObjectKind::Component } else { @@ -61,8 +75,8 @@ impl<'a> CodeBuilder<'a> { #[cfg(not(feature = "cache"))] { let (mmap, info_and_types) = - build_artifacts(self.engine, &wasm, dwarf_package.as_deref())?; - let code = publish_mmap(mmap.0)?; + build_artifacts(self.engine, &wasm, dwarf_package.as_deref(), state)?; + let code = publish_mmap(self.engine, mmap.0)?; return Ok((code, info_and_types)); } @@ -79,7 +93,9 @@ impl<'a> CodeBuilder<'a> { /// Note that this method will cache compilations if the `cache` feature is /// enabled and turned on in [`Config`](crate::Config). pub fn compile_module(&self) -> Result { - let (code, info_and_types) = self.compile_cached(super::build_artifacts)?; + let custom_alignment = self.custom_alignment(); + let (code, info_and_types) = + self.compile_cached(super::build_artifacts, &custom_alignment)?; Module::from_parts(self.engine, code, info_and_types) } @@ -87,22 +103,42 @@ impl<'a> CodeBuilder<'a> { /// [`Component`] instead of a module. #[cfg(feature = "component-model")] pub fn compile_component(&self) -> Result { - let (code, artifacts) = self.compile_cached(super::build_component_artifacts)?; + let custom_alignment = self.custom_alignment(); + let (code, artifacts) = + self.compile_cached(super::build_component_artifacts, &custom_alignment)?; Component::from_parts(self.engine, code, artifacts) } + + fn custom_alignment(&self) -> CustomAlignment { + CustomAlignment { + alignment: self + .engine + .custom_code_memory() + .map(|c| c.required_alignment()) + .unwrap_or(1), + } + } } -fn publish_mmap(mmap: MmapVec) -> Result> { - let mut code = CodeMemory::new(mmap)?; +fn publish_mmap(engine: &Engine, mmap: MmapVec) -> Result> { + let mut code = CodeMemory::new(engine, mmap)?; code.publish()?; Ok(Arc::new(code)) } pub(crate) struct MmapVecWrapper(pub MmapVec); +/// Custom alignment requirements from the Engine for +/// produced-at-runtime-in-memory code artifacts. +pub(crate) struct CustomAlignment { + alignment: usize, +} + impl FinishedObject for MmapVecWrapper { - fn finish_object(obj: ObjectBuilder<'_>) -> Result { + type State = CustomAlignment; + fn finish_object(obj: ObjectBuilder<'_>, align: &CustomAlignment) -> Result { let mut result = ObjectMmap::default(); + result.alignment = align.alignment; return match obj.finish(&mut result) { Ok(()) => { assert!(result.mmap.is_some(), "no reserve"); @@ -127,6 +163,7 @@ impl FinishedObject for MmapVecWrapper { struct ObjectMmap { mmap: Option, len: usize, + alignment: usize, err: Option, } @@ -137,7 +174,7 @@ impl FinishedObject for MmapVecWrapper { fn reserve(&mut self, additional: usize) -> Result<(), ()> { assert!(self.mmap.is_none(), "cannot reserve twice"); - self.mmap = match MmapVec::with_capacity(additional) { + self.mmap = match MmapVec::with_capacity_and_alignment(additional, self.alignment) { Ok(mmap) => Some(mmap), Err(e) => { self.err = Some(e); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index a99d422d1b4c..c6dbabdafbcd 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -29,6 +29,8 @@ use crate::stack::{StackCreator, StackCreatorProxy}; #[cfg(feature = "async")] use wasmtime_fiber::RuntimeFiberStackCreator; +#[cfg(feature = "runtime")] +pub use crate::runtime::code_memory::CustomCodeMemory; #[cfg(feature = "pooling-allocator")] pub use crate::runtime::vm::MpkEnabled; #[cfg(all(feature = "incremental-cache", feature = "cranelift"))] @@ -133,6 +135,8 @@ pub struct Config { pub(crate) cache_config: CacheConfig, #[cfg(feature = "runtime")] pub(crate) mem_creator: Option>, + #[cfg(feature = "runtime")] + pub(crate) custom_code_memory: Option>, pub(crate) allocation_strategy: InstanceAllocationStrategy, pub(crate) max_wasm_stack: usize, /// Explicitly enabled features via `Config::wasm_*` methods. This is a @@ -233,6 +237,8 @@ impl Config { profiling_strategy: ProfilingStrategy::None, #[cfg(feature = "runtime")] mem_creator: None, + #[cfg(feature = "runtime")] + custom_code_memory: None, allocation_strategy: InstanceAllocationStrategy::OnDemand, // 512k of stack -- note that this is chosen currently to not be too // big, not be too small, and be a good default for most platforms. @@ -1336,6 +1342,33 @@ impl Config { self } + /// Sets a custom executable-memory publisher. + /// + /// Custom executable-memory publishers are hooks that allow + /// Wasmtime to make certain regions of memory executable when + /// loading precompiled modules or compiling new modules + /// in-process. In most modern operating systems, memory allocated + /// for heap usage is readable and writable by default but not + /// executable. To jump to machine code stored in that memory, we + /// need to make it executable. For security reasons, we usually + /// also make it read-only at the same time, so the executing code + /// can't be modified later. + /// + /// By default, Wasmtime will use the appropriate system calls on + /// the host platform for this work. However, it also allows + /// plugging in a custom implementation via this configuration + /// option. This may be useful on custom or `no_std` platforms, + /// for example, especially where virtual memory is not otherwise + /// used by Wasmtime (no `signals-and-traps` feature). + #[cfg(feature = "runtime")] + pub fn with_custom_code_memory( + &mut self, + custom_code_memory: Option>, + ) -> &mut Self { + self.custom_code_memory = custom_code_memory; + self + } + /// Sets the instance allocation strategy to use. /// /// This is notably used in conjunction with diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index e2ba2caa7b5c..7f8669c16bca 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -1,5 +1,7 @@ use crate::prelude::*; #[cfg(feature = "runtime")] +pub use crate::runtime::code_memory::CustomCodeMemory; +#[cfg(feature = "runtime")] use crate::runtime::type_registry::TypeRegistry; #[cfg(feature = "runtime")] use crate::runtime::vm::GcRuntime; @@ -655,6 +657,11 @@ impl Engine { &self.inner.signatures } + #[cfg(feature = "runtime")] + pub(crate) fn custom_code_memory(&self) -> Option<&Arc> { + self.config().custom_code_memory.as_ref() + } + pub(crate) fn epoch_counter(&self) -> &AtomicU64 { &self.inner.epoch } @@ -722,6 +729,15 @@ impl Engine { (f1(), f2()) } + /// Returns the required alignment for a code image, if we + /// allocate in a way that is not a system `mmap()` that naturally + /// aligns it. + fn required_code_alignment(&self) -> usize { + self.custom_code_memory() + .map(|c| c.required_alignment()) + .unwrap_or(1) + } + /// Loads a `CodeMemory` from the specified in-memory slice, copying it to a /// uniquely owned mmap. /// @@ -732,7 +748,13 @@ impl Engine { bytes: &[u8], expected: ObjectKind, ) -> Result> { - self.load_code(crate::runtime::vm::MmapVec::from_slice(bytes)?, expected) + self.load_code( + crate::runtime::vm::MmapVec::from_slice_with_alignment( + bytes, + self.required_code_alignment(), + )?, + expected, + ) } /// Like `load_code_bytes`, but creates a mmap from a file on disk. @@ -755,7 +777,7 @@ impl Engine { expected: ObjectKind, ) -> Result> { serialization::check_compatible(self, &mmap, expected)?; - let mut code = crate::CodeMemory::new(mmap)?; + let mut code = crate::CodeMemory::new(self, mmap)?; code.publish()?; Ok(Arc::new(code)) } diff --git a/crates/wasmtime/src/runtime/code_memory.rs b/crates/wasmtime/src/runtime/code_memory.rs index 8ad04f789b71..390e851cac26 100644 --- a/crates/wasmtime/src/runtime/code_memory.rs +++ b/crates/wasmtime/src/runtime/code_memory.rs @@ -2,6 +2,8 @@ use crate::prelude::*; use crate::runtime::vm::{libcalls, MmapVec, UnwindRegistration}; +use crate::Engine; +use alloc::sync::Arc; use core::ops::Range; use object::endian::Endianness; use object::read::{elf::ElfFile64, Object, ObjectSection}; @@ -22,6 +24,7 @@ pub struct CodeMemory { needs_executable: bool, #[cfg(feature = "debug-builtins")] has_native_debug_info: bool, + custom_code_memory: Option>, relocations: Vec<(usize, obj::LibCall)>, @@ -38,6 +41,14 @@ pub struct CodeMemory { impl Drop for CodeMemory { fn drop(&mut self) { + // If there is a custom code memory handler, restore the + // original (non-executable) state of the memory. + if let Some(mem) = self.custom_code_memory.as_ref() { + let text = self.text(); + mem.unpublish_executable(text.as_ptr(), text.len()) + .expect("Executable memory unpublish failed"); + } + // Drop the registrations before `self.mmap` since they (implicitly) refer to it. let _ = self.unwind_registration.take(); #[cfg(feature = "debug-builtins")] @@ -50,13 +61,50 @@ fn _assert() { _assert_send_sync::(); } +/// Interface implemented by an embedder to provide custom +/// implementations of code-memory protection and execute permissions. +pub trait CustomCodeMemory: Send + Sync { + /// The minimal alignment granularity for an address region that + /// can be made executable. + /// + /// Wasmtime does not assume the system page size for this because + /// custom code-memory protection can be used when all other uses + /// of virtual memory are disabled. + fn required_alignment(&self) -> usize; + + /// Publish a region of memory as executable. + /// + /// This should update permissions from the default RW + /// (readable/writable but not executable) to RX + /// (readable/executable but not writable), enforcing W^X + /// discipline. + /// + /// If the platform requires any data/instruction coherence + /// action, that should be performed as part of this hook as well. + /// + /// `ptr` and `ptr.offset(len)` are guaranteed to be aligned as + /// per `required_alignment()`. + fn publish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()>; + + /// Unpublish a region of memory. + /// + /// This should perform the opposite effect of `make_executable`, + /// switching a range of memory back from RX (readable/executable) + /// to RW (readable/writable). It is guaranteed that no code is + /// running anymore from this region. + /// + /// `ptr` and `ptr.offset(len)` are guaranteed to be aligned as + /// per `required_alignment()`. + fn unpublish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()>; +} + impl CodeMemory { /// Creates a new `CodeMemory` by taking ownership of the provided /// `MmapVec`. /// /// The returned `CodeMemory` manages the internal `MmapVec` and the /// `publish` method is used to actually make the memory executable. - pub fn new(mmap: MmapVec) -> Result { + pub fn new(engine: &Engine, mmap: MmapVec) -> Result { let obj = ElfFile64::::parse(&mmap[..]) .map_err(obj::ObjectCrateErrorWrapper) .with_context(|| "failed to parse internal compilation artifact")?; @@ -140,6 +188,7 @@ impl CodeMemory { _ => log::debug!("ignoring section {name}"), } } + Ok(Self { mmap, unwind_registration: None, @@ -151,6 +200,7 @@ impl CodeMemory { needs_executable, #[cfg(feature = "debug-builtins")] has_native_debug_info, + custom_code_memory: engine.custom_code_memory().cloned(), text, unwind, trap_data, @@ -270,28 +320,30 @@ impl CodeMemory { // Switch the executable portion from readonly to read/execute. if self.needs_executable { - #[cfg(feature = "signals-based-traps")] - { - let text = self.text(); - - use wasmtime_jit_icache_coherence as icache_coherence; - - // Clear the newly allocated code from cache if the processor requires it - // - // Do this before marking the memory as R+X, technically we should be able to do it after - // but there are some CPU's that have had errata about doing this with read only memory. - icache_coherence::clear_cache(text.as_ptr().cast(), text.len()) - .expect("Failed cache clear"); - - self.mmap - .make_executable(self.text.clone(), self.enable_branch_protection) - .context("unable to make memory executable")?; - - // Flush any in-flight instructions from the pipeline - icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + if !self.custom_publish()? { + #[cfg(feature = "signals-based-traps")] + { + let text = self.text(); + + use wasmtime_jit_icache_coherence as icache_coherence; + + // Clear the newly allocated code from cache if the processor requires it + // + // Do this before marking the memory as R+X, technically we should be able to do it after + // but there are some CPU's that have had errata about doing this with read only memory. + icache_coherence::clear_cache(text.as_ptr().cast(), text.len()) + .expect("Failed cache clear"); + + self.mmap + .make_executable(self.text.clone(), self.enable_branch_protection) + .context("unable to make memory executable")?; + + // Flush any in-flight instructions from the pipeline + icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + } + #[cfg(not(feature = "signals-based-traps"))] + bail!("this target requires virtual memory to be enabled"); } - #[cfg(not(feature = "signals-based-traps"))] - bail!("this target requires virtual memory to be enabled"); } // With all our memory set up use the platform-specific @@ -307,6 +359,29 @@ impl CodeMemory { Ok(()) } + fn custom_publish(&mut self) -> Result { + if let Some(mem) = self.custom_code_memory.as_ref() { + let text = self.text(); + // The text section should be aligned to + // `custom_code_memory.required_alignment()` due to a + // combination of two invariants: + // + // - MmapVec aligns its start address, even in owned-Vec mode; and + // - The text segment inside the ELF image will be aligned according + // to the platform's requirements. + let text_addr = text.as_ptr() as usize; + assert_eq!(text_addr & (mem.required_alignment() - 1), 0); + + // The custom code memory handler will ensure the + // memory is executable and also handle icache + // coherence. + mem.publish_executable(text.as_ptr(), text.len())?; + Ok(true) + } else { + Ok(false) + } + } + unsafe fn apply_relocations(&mut self) -> Result<()> { if self.relocations.is_empty() { return Ok(()); diff --git a/crates/wasmtime/src/runtime/vm/mmap_vec.rs b/crates/wasmtime/src/runtime/vm/mmap_vec.rs index f283ef4265bd..33040642384e 100644 --- a/crates/wasmtime/src/runtime/vm/mmap_vec.rs +++ b/crates/wasmtime/src/runtime/vm/mmap_vec.rs @@ -1,13 +1,19 @@ use crate::prelude::*; +#[cfg(not(feature = "signals-based-traps"))] +use crate::runtime::vm::send_sync_ptr::SendSyncPtr; #[cfg(feature = "signals-based-traps")] use crate::runtime::vm::{mmap::UnalignedLength, Mmap}; +#[cfg(not(feature = "signals-based-traps"))] +use alloc::alloc::Layout; use alloc::sync::Arc; use core::ops::{Deref, Range}; +#[cfg(not(feature = "signals-based-traps"))] +use core::ptr::NonNull; #[cfg(feature = "std")] use std::fs::File; /// A type which prefers to store backing memory in an OS-backed memory mapping -/// but can fall back to `Vec` as well. +/// but can fall back to the regular memory allocator as well. /// /// This type is used to store code in Wasmtime and manage read-only and /// executable permissions of compiled images. This is created from either an @@ -20,13 +26,19 @@ use std::fs::File; /// are typically not, then the remaining bytes in the final page for /// mmap-backed instances are unused. /// -/// Note that when `signals-based-traps` is disabled then this type is backed -/// by a normal `Vec`. In such a scenario this type does not support -/// read-only or executable bits and the methods are not available. +/// Note that when `signals-based-traps` is disabled then this type is +/// backed by the regular memory allocator via `alloc` APIs. In such a +/// scenario this type does not support read-only or executable bits +/// and the methods are not available. However, the `CustomCodeMemory` +/// mechanism may be used by the embedder to set up and tear down +/// executable permissions on parts of this storage. pub enum MmapVec { #[doc(hidden)] #[cfg(not(feature = "signals-based-traps"))] - Vec(Vec), + Alloc { + base: SendSyncPtr, + layout: Layout, + }, #[doc(hidden)] #[cfg(feature = "signals-based-traps")] Mmap { @@ -52,20 +64,32 @@ impl MmapVec { } #[cfg(not(feature = "signals-based-traps"))] - fn new_vec(vec: Vec) -> MmapVec { - MmapVec::Vec(vec) + fn new_alloc(len: usize, alignment: usize) -> MmapVec { + let layout = Layout::from_size_align(len, alignment) + .expect("Invalid size or alignment for MmapVec allocation"); + let base = SendSyncPtr::new( + NonNull::new(unsafe { alloc::alloc::alloc_zeroed(layout.clone()) }) + .expect("Allocation of MmapVec storage failed"), + ); + MmapVec::Alloc { base, layout } } - /// Creates a new zero-initialized `MmapVec` with the given `size`. + /// Creates a new zero-initialized `MmapVec` with the given `size` + /// and `alignment`. /// /// This commit will return a new `MmapVec` suitably sized to hold `size` /// bytes. All bytes will be initialized to zero since this is a fresh OS /// page allocation. - pub fn with_capacity(size: usize) -> Result { + pub fn with_capacity_and_alignment(size: usize, alignment: usize) -> Result { #[cfg(feature = "signals-based-traps")] - return Ok(MmapVec::new_mmap(Mmap::with_at_least(size)?, size)); + { + assert!(alignment <= crate::runtime::vm::host_page_size()); + return Ok(MmapVec::new_mmap(Mmap::with_at_least(size)?, size)); + } #[cfg(not(feature = "signals-based-traps"))] - return Ok(MmapVec::new_vec(vec![0; size])); + { + return Ok(MmapVec::new_alloc(size, alignment)); + } } /// Creates a new `MmapVec` from the contents of an existing `slice`. @@ -74,7 +98,21 @@ impl MmapVec { /// `slice` is copied into the new mmap. It's recommended to avoid this /// method if possible to avoid the need to copy data around. pub fn from_slice(slice: &[u8]) -> Result { - let mut result = MmapVec::with_capacity(slice.len())?; + MmapVec::from_slice_with_alignment(slice, 1) + } + + /// Creates a new `MmapVec` from the contents of an existing + /// `slice`, with a minimum alignment. + /// + /// `align` must be a power of two. This is useful when page + /// alignment is required when the system otherwise does not use + /// virtual memory but has a custom code publish handler. + /// + /// A new `MmapVec` is allocated to hold the contents of `slice` and then + /// `slice` is copied into the new mmap. It's recommended to avoid this + /// method if possible to avoid the need to copy data around. pub + pub fn from_slice_with_alignment(slice: &[u8], align: usize) -> Result { + let mut result = MmapVec::with_capacity_and_alignment(slice.len(), align)?; // SAFETY: The mmap hasn't been made readonly yet so this should be // safe to call. unsafe { @@ -132,7 +170,7 @@ impl MmapVec { pub fn original_file(&self) -> Option<&Arc> { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(_) => None, + MmapVec::Alloc { .. } => None, #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, .. } => mmap.original_file(), } @@ -155,7 +193,9 @@ impl MmapVec { pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(v) => v, + MmapVec::Alloc { base, layout } => { + core::slice::from_raw_parts_mut(base.as_mut(), layout.size()) + } #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, len } => mmap.slice_mut(0..*len), } @@ -169,7 +209,9 @@ impl Deref for MmapVec { fn deref(&self) -> &[u8] { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(v) => v, + MmapVec::Alloc { base, layout } => unsafe { + core::slice::from_raw_parts(base.as_ptr(), layout.size()) + }, #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, len } => { // SAFETY: all bytes for this mmap, which is owned by @@ -180,13 +222,28 @@ impl Deref for MmapVec { } } +impl Drop for MmapVec { + fn drop(&mut self) { + match self { + #[cfg(not(feature = "signals-based-traps"))] + MmapVec::Alloc { base, layout, .. } => unsafe { + alloc::alloc::dealloc(base.as_mut(), layout.clone()); + }, + #[cfg(feature = "signals-based-traps")] + MmapVec::Mmap { .. } => { + // Drop impl on the `mmap` takes care of this case. + } + } + } +} + #[cfg(test)] mod tests { use super::MmapVec; #[test] fn smoke() { - let mut mmap = MmapVec::with_capacity(10).unwrap(); + let mut mmap = MmapVec::with_capacity_and_alignment(10, 1).unwrap(); assert_eq!(mmap.len(), 10); assert_eq!(&mmap[..], &[0; 10]); @@ -198,4 +255,11 @@ mod tests { assert_eq!(mmap[0], 1); assert_eq!(mmap[2], 3); } + + #[test] + fn alignment() { + let mmap = MmapVec::with_capacity_and_alignment(10, 4096).unwrap(); + let raw_ptr = &mmap[0] as *const _ as usize; + assert_eq!(raw_ptr & (4096 - 1), 0); + } } diff --git a/tests/all/custom_code_memory.rs b/tests/all/custom_code_memory.rs new file mode 100644 index 000000000000..61afacf61fa6 --- /dev/null +++ b/tests/all/custom_code_memory.rs @@ -0,0 +1,53 @@ +#[cfg(all(not(target_os = "windows"), not(miri)))] +mod not_for_windows { + use rustix::mm::{mprotect, MprotectFlags}; + use rustix::param::page_size; + use std::sync::Arc; + use wasmtime::*; + + struct CustomCodePublish; + impl CustomCodeMemory for CustomCodePublish { + fn required_alignment(&self) -> usize { + page_size() + } + + fn publish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()> { + unsafe { + mprotect( + ptr as *mut _, + len, + MprotectFlags::READ | MprotectFlags::EXEC, + )?; + } + Ok(()) + } + + fn unpublish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()> { + unsafe { + mprotect( + ptr as *mut _, + len, + MprotectFlags::READ | MprotectFlags::WRITE, + )?; + } + Ok(()) + } + } + + #[test] + fn custom_code_publish() { + let mut config = Config::default(); + config.with_custom_code_memory(Some(Arc::new(CustomCodePublish))); + let engine = Engine::new(&config).unwrap(); + let module = Module::new( + &engine, + "(module (func (export \"main\") (result i32) i32.const 42))", + ) + .unwrap(); + let mut store = Store::new(&engine, ()); + let instance = Instance::new(&mut store, &module, &[]).unwrap(); + let func: TypedFunc<(), i32> = instance.get_typed_func(&mut store, "main").unwrap(); + let result = func.call(&mut store, ()).unwrap(); + assert_eq!(result, 42); + } +} diff --git a/tests/all/main.rs b/tests/all/main.rs index 1dd7a2630e89..bee9eb17331f 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -8,6 +8,7 @@ mod cli_tests; mod code_too_large; mod component_model; mod coredump; +mod custom_code_memory; mod debug; mod defaults; mod epoch_interruption; From 59c043ec7a19b39d7b2096349ca78f0107d5c05f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 11 Dec 2024 00:58:31 +0100 Subject: [PATCH 16/30] relax FnMut constraints (#9782) --- winch/codegen/src/codegen/context.rs | 20 ++++++++++---------- winch/codegen/src/masm.rs | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index b23096bc44ed..9835ce83dee1 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -308,9 +308,9 @@ impl<'a> CodeGenContext<'a, Emission> { /// Prepares arguments for emitting a binary operation. /// /// The `emit` function returns the `TypedReg` to put on the value stack. - pub fn binop(&mut self, masm: &mut M, size: OperandSize, mut emit: F) + pub fn binop(&mut self, masm: &mut M, size: OperandSize, emit: F) where - F: FnMut(&mut M, Reg, Reg, OperandSize) -> TypedReg, + F: FnOnce(&mut M, Reg, Reg, OperandSize) -> TypedReg, M: MacroAssembler, { let src = self.pop_to_reg(masm, None); @@ -321,9 +321,9 @@ impl<'a> CodeGenContext<'a, Emission> { } /// Prepares arguments for emitting an f32 or f64 comparison operation. - pub fn float_cmp_op(&mut self, masm: &mut M, size: OperandSize, mut emit: F) + pub fn float_cmp_op(&mut self, masm: &mut M, size: OperandSize, emit: F) where - F: FnMut(&mut M, Reg, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, Reg, OperandSize), M: MacroAssembler, { let src2 = self.pop_to_reg(masm, None); @@ -371,9 +371,9 @@ impl<'a> CodeGenContext<'a, Emission> { /// Prepares arguments for emitting an i64 binary operation. /// /// The `emit` function returns the `TypedReg` to put on the value stack. - pub fn i64_binop(&mut self, masm: &mut M, mut emit: F) + pub fn i64_binop(&mut self, masm: &mut M, emit: F) where - F: FnMut(&mut M, Reg, RegImm, OperandSize) -> TypedReg, + F: FnOnce(&mut M, Reg, RegImm, OperandSize) -> TypedReg, M: MacroAssembler, { let top = self.stack.peek().expect("value at stack top"); @@ -393,9 +393,9 @@ impl<'a> CodeGenContext<'a, Emission> { } /// Prepares arguments for emitting a convert operation. - pub fn convert_op(&mut self, masm: &mut M, dst_ty: WasmValType, mut emit: F) + pub fn convert_op(&mut self, masm: &mut M, dst_ty: WasmValType, emit: F) where - F: FnMut(&mut M, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, OperandSize), M: MacroAssembler, { let src = self.pop_to_reg(masm, None); @@ -422,9 +422,9 @@ impl<'a> CodeGenContext<'a, Emission> { masm: &mut M, dst_ty: WasmValType, tmp_reg_class: RegClass, - mut emit: F, + emit: F, ) where - F: FnMut(&mut M, Reg, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, Reg, OperandSize), M: MacroAssembler, { let tmp_gpr = self.reg_for_class(tmp_reg_class, masm); diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 51b6e963c81c..da25d88c486a 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -14,7 +14,7 @@ use wasmtime_environ::PtrSize; pub(crate) use cranelift_codegen::ir::TrapCode; -#[derive(Eq, PartialEq, Clone, Copy)] +#[derive(Eq, PartialEq)] pub(crate) enum DivKind { /// Signed division. Signed, From 363db4f378de19cc3618ebab401dba5755a16085 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 05:26:12 -0700 Subject: [PATCH 17/30] pulley: Ungate SIMD proposal to enable more tests (#9779) * pulley: Ungate SIMD proposal to enable more tests This commit removes the simd proposal from "known panicking compiler features" for Pulley by filling out some minor ABI bits around loads/stores of v128 in the Cranelift ABI code. This unlocks running `spec_testsuite/*.wast` in CI and expecting it to pass as otherwise everything was rejected since that test suite requires SIMD and it never even got to the Cranelift backend. This then fixes a few minor issues here and there to get existing tests passing now that they're actually being run. Some examples are: * Fix some copy/paste typos around callee-save clobbers and float registers. * Add vector load/store support to avoid panicking in ABI code (not well tested yet though). * Zero-extend the condition in `brif`'s lowering to ensure that the entire register is defined. In the future more variants of `br_if` in Pulley are probably desired. * Limit `store` lowerings to just integer types, not vector types. * Fix `Func::call` with Pulley when the callee is a host function. * Fix test expectations prtest:full --- .../codegen/src/isa/pulley_shared/abi.rs | 32 ++++----- .../codegen/src/isa/pulley_shared/inst.isle | 10 +++ .../src/isa/pulley_shared/inst/emit.rs | 28 ++++++++ .../codegen/src/isa/pulley_shared/inst/mod.rs | 66 ++++++++++++++++++- .../codegen/src/isa/pulley_shared/lower.isle | 14 ++-- .../filetests/isa/pulley32/brif.clif | 48 +++++++++----- .../filetests/isa/pulley32/call.clif | 46 +++++++++---- .../filetests/isa/pulley32/jump.clif | 8 ++- .../filetests/isa/pulley64/brif.clif | 48 +++++++++----- .../filetests/isa/pulley64/call.clif | 46 +++++++++---- .../filetests/isa/pulley64/jump.clif | 8 ++- crates/wasmtime/src/config.rs | 4 +- crates/wasmtime/src/runtime/vm/vmcontext.rs | 6 ++ crates/wast-util/src/lib.rs | 66 ++++++++++++++++++- pulley/src/interp.rs | 32 +++++++++ pulley/src/lib.rs | 5 ++ tests/disas/pulley/epoch-simple.wat | 19 +++--- 17 files changed, 376 insertions(+), 110 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 8ea8fb0c9116..c304eb3f5031 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -646,22 +646,22 @@ const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty() .with(px_reg(30)) .with(px_reg(31)) // Float registers. - .with(px_reg(16)) - .with(px_reg(17)) - .with(px_reg(18)) - .with(px_reg(19)) - .with(px_reg(20)) - .with(px_reg(21)) - .with(px_reg(22)) - .with(px_reg(23)) - .with(px_reg(24)) - .with(px_reg(25)) - .with(px_reg(26)) - .with(px_reg(27)) - .with(px_reg(28)) - .with(px_reg(29)) - .with(px_reg(30)) - .with(px_reg(31)) + .with(pf_reg(16)) + .with(pf_reg(17)) + .with(pf_reg(18)) + .with(pf_reg(19)) + .with(pf_reg(20)) + .with(pf_reg(21)) + .with(pf_reg(22)) + .with(pf_reg(23)) + .with(pf_reg(24)) + .with(pf_reg(25)) + .with(pf_reg(26)) + .with(pf_reg(27)) + .with(pf_reg(28)) + .with(pf_reg(29)) + .with(pf_reg(30)) + .with(pf_reg(31)) // Note: no vector registers are callee-saved. ; diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 18661d64ddbd..931aec79b5a8 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -80,10 +80,12 @@ ;; control behavior such as endianness. (XLoad (dst WritableXReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) (FLoad (dst WritableFReg) (mem Amode) (ty Type) (flags MemFlags)) + (VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags)) ;; Stores. (XStore (mem Amode) (src XReg) (ty Type) (flags MemFlags)) (FStore (mem Amode) (src FReg) (ty Type) (flags MemFlags)) + (VStore (mem Amode) (src VReg) (ty Type) (flags MemFlags)) ;; A raw pulley instruction generated at compile-time via Pulley's ;; `for_each_op!` macro. This variant has `pulley_*` constructors to @@ -429,3 +431,11 @@ (decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) (extern constructor gen_call_indirect gen_call_indirect) + +;;;; Helpers for Sign/Zero extension ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl zext (Value) XReg) +(rule (zext val @ (value_type $I64)) val) +(rule (zext val @ (value_type $I32)) (pulley_zext32 val)) +(rule (zext val @ (value_type $I16)) (pulley_zext16 val)) +(rule (zext val @ (value_type $I8)) (pulley_zext8 val)) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index fb868c2c60f5..3fb99285eff1 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -502,6 +502,20 @@ fn pulley_emit

( } } + Inst::VLoad { + dst, + mem, + ty, + flags, + } => { + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + assert_eq!(endian, Endianness::Little); + assert_eq!(ty.bytes(), 16); + enc::vload128le_offset32(sink, dst, r, x); + } + Inst::XStore { mem, src, @@ -553,6 +567,20 @@ fn pulley_emit

( } } + Inst::VStore { + mem, + src, + ty, + flags, + } => { + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + assert_eq!(endian, Endianness::Little); + assert_eq!(ty.bytes(), 16); + enc::vstore128le_offset32(sink, r, x, src); + } + Inst::BrTable { idx, default, diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 0678a02c48d3..d85e683011b8 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -44,7 +44,15 @@ mod generated { impl Inst { /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(dst: Writable, mem: Amode, ty: Type, flags: MemFlags) -> Inst { - if ty.is_int() { + if ty.is_vector() { + assert_eq!(ty.bytes(), 16); + Inst::VLoad { + dst: dst.map(|r| VReg::new(r).unwrap()), + mem, + ty, + flags, + } + } else if ty.is_int() { Inst::XLoad { dst: dst.map(|r| XReg::new(r).unwrap()), mem, @@ -64,7 +72,15 @@ impl Inst { /// Generic constructor for a store. pub fn gen_store(mem: Amode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { - if ty.is_int() { + if ty.is_vector() { + assert_eq!(ty.bytes(), 16); + Inst::VStore { + mem, + src: VReg::new(from_reg).unwrap(), + ty, + flags, + } + } else if ty.is_int() { Inst::XStore { mem, src: XReg::new(from_reg).unwrap(), @@ -242,6 +258,26 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(src); } + Inst::VLoad { + dst, + mem, + ty: _, + flags: _, + } => { + collector.reg_def(dst); + mem.get_operands(collector); + } + + Inst::VStore { + mem, + src, + ty: _, + flags: _, + } => { + mem.get_operands(collector); + collector.reg_use(src); + } + Inst::BrTable { idx, .. } => { collector.reg_use(idx); } @@ -361,7 +397,7 @@ where } fn is_included_in_clobbers(&self) -> bool { - self.is_args() + !self.is_args() } fn is_trap(&self) -> bool { @@ -752,6 +788,30 @@ impl Inst { format!("fstore{ty} {mem}, {src} // flags = {flags}") } + Inst::VLoad { + dst, + mem, + ty, + flags, + } => { + let dst = format_reg(*dst.to_reg()); + let ty = ty.bits(); + let mem = mem.to_string(); + format!("{dst} = vload{ty} {mem} // flags ={flags}") + } + + Inst::VStore { + mem, + src, + ty, + flags, + } => { + let ty = ty.bits(); + let mem = mem.to_string(); + let src = format_reg(**src); + format!("vstore{ty} {mem}, {src} // flags = {flags}") + } + Inst::BrTable { idx, default, diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 8e84c17058a6..693bc7c35719 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -16,7 +16,7 @@ ;; Generic case for conditional branches. (rule -1 (lower_branch (brif (maybe_uextend c) _ _) (two_targets then else)) - (emit_side_effect (pulley_br_if c then else))) + (emit_side_effect (pulley_br_if (zext c) then else))) ;; Conditional branches on `icmp`s. (rule (lower_branch (brif (maybe_uextend (icmp cc a b @ (value_type $I32))) _ _) @@ -296,7 +296,7 @@ ;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (store flags src @ (value_type ty) addr offset)) +(rule (lower (store flags src @ (value_type (ty_int ty)) addr offset)) (side_effect (pulley_xstore (amode addr offset) src ty flags))) (rule 1 (lower (store flags src @ (value_type (ty_scalar_float ty)) addr offset)) @@ -324,14 +324,8 @@ ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I8)))) - (pulley_zext8 val)) - -(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I16)))) - (pulley_zext16 val)) - -(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I32)))) - (pulley_zext32 val)) +(rule (lower (has_type (fits_in_64 _) (uextend val))) + (zext val)) ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index cc6bb012eb28..07826617c5ff 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -16,7 +16,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -25,7 +26,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext8 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -46,7 +48,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext16 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -55,7 +58,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext16 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -76,7 +80,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext32 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -85,7 +90,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext32 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -137,8 +143,9 @@ block2: ; VCode: ; block0: -; xeq32 x5, x0, x1 -; br_if x5, label2; jump label1 +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -147,8 +154,9 @@ block2: ; ret ; ; Disassembled: -; xeq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -170,8 +178,9 @@ block2: ; VCode: ; block0: -; xneq32 x5, x0, x1 -; br_if x5, label2; jump label1 +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -180,8 +189,9 @@ block2: ; ret ; ; Disassembled: -; xneq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -234,8 +244,9 @@ block2: ; VCode: ; block0: -; xulteq64 x5, x1, x0 -; br_if x5, label2; jump label1 +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -244,8 +255,9 @@ block2: ; ret ; ; Disassembled: -; xulteq64 x5, x1, x0 -; br_if x5, 0xa // target = 0xd +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6386273c1078..449043f0bdab 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -227,9 +227,14 @@ block0: ; VCode: ; push_frame -; stack_alloc32 64 -; xstore64 sp+56, x18 // flags = notrap aligned -; xstore64 sp+48, x20 // flags = notrap aligned +; stack_alloc32 112 +; xstore64 sp+104, x18 // flags = notrap aligned +; xstore64 sp+96, x19 // flags = notrap aligned +; xstore64 sp+88, x20 // flags = notrap aligned +; xstore64 sp+80, x21 // flags = notrap aligned +; xstore64 sp+72, x23 // flags = notrap aligned +; xstore64 sp+64, x24 // flags = notrap aligned +; xstore64 sp+56, x25 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } @@ -265,19 +270,29 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; x18 = xload64 sp+56 // flags = notrap aligned -; x20 = xload64 sp+48 // flags = notrap aligned -; stack_free32 64 +; x18 = xload64 sp+104 // flags = notrap aligned +; x19 = xload64 sp+96 // flags = notrap aligned +; x20 = xload64 sp+88 // flags = notrap aligned +; x21 = xload64 sp+80 // flags = notrap aligned +; x23 = xload64 sp+72 // flags = notrap aligned +; x24 = xload64 sp+64 // flags = notrap aligned +; x25 = xload64 sp+56 // flags = notrap aligned +; stack_free32 112 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; stack_alloc32 64 -; xstore64le_offset32 sp, 56, x18 -; xstore64le_offset32 sp, 48, x20 +; stack_alloc32 112 +; xstore64le_offset32 sp, 104, x18 +; xstore64le_offset32 sp, 96, x19 +; xstore64le_offset32 sp, 88, x20 +; xstore64le_offset32 sp, 80, x21 +; xstore64le_offset32 sp, 72, x23 +; xstore64le_offset32 sp, 64, x24 +; xstore64le_offset32 sp, 56, x25 ; xmov x0, sp -; call 0x0 // target = 0x17 +; call 0x0 // target = 0x3a ; xmov x18, x13 ; xmov x20, x11 ; xload64le_offset32 x24, sp, 0 @@ -310,9 +325,14 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; xload64le_offset32 x18, sp, 56 -; xload64le_offset32 x20, sp, 48 -; stack_free32 64 +; xload64le_offset32 x18, sp, 104 +; xload64le_offset32 x19, sp, 96 +; xload64le_offset32 x20, sp, 88 +; xload64le_offset32 x21, sp, 80 +; xload64le_offset32 x23, sp, 72 +; xload64le_offset32 x24, sp, 64 +; xload64le_offset32 x25, sp, 56 +; stack_free32 112 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/jump.clif b/cranelift/filetests/filetests/isa/pulley32/jump.clif index 5523088ad538..a5a32180e7a8 100644 --- a/cranelift/filetests/filetests/isa/pulley32/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley32/jump.clif @@ -19,7 +19,8 @@ block3(v3: i8): ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x5, x0 +; br_if x5, label2; jump label1 ; block1: ; xconst8 x0, 0 ; jump label3 @@ -30,9 +31,10 @@ block3(v3: i8): ; ret ; ; Disassembled: -; br_if x0, 0xe // target = 0xe +; zext8 x5, x0 +; br_if x5, 0xe // target = 0x11 ; xconst8 x0, 0 -; jump 0x8 // target = 0x11 +; jump 0x8 // target = 0x14 ; xconst8 x0, 1 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index 0585a726bfe7..f8c854349eb5 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -16,7 +16,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -25,7 +26,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext8 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -46,7 +48,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext16 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -55,7 +58,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext16 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -76,7 +80,8 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext32 x4, x0 +; br_if x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -85,7 +90,8 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext32 x4, x0 +; br_if x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -137,8 +143,9 @@ block2: ; VCode: ; block0: -; xeq32 x5, x0, x1 -; br_if x5, label2; jump label1 +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -147,8 +154,9 @@ block2: ; ret ; ; Disassembled: -; xeq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -170,8 +178,9 @@ block2: ; VCode: ; block0: -; xneq32 x5, x0, x1 -; br_if x5, label2; jump label1 +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -180,8 +189,9 @@ block2: ; ret ; ; Disassembled: -; xneq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -234,8 +244,9 @@ block2: ; VCode: ; block0: -; xulteq64 x5, x1, x0 -; br_if x5, label2; jump label1 +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -244,8 +255,9 @@ block2: ; ret ; ; Disassembled: -; xulteq64 x5, x1, x0 -; br_if x5, 0xa // target = 0xd +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 0f47c4e765b8..67b401aa3b5f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -227,9 +227,14 @@ block0: ; VCode: ; push_frame -; stack_alloc32 64 -; xstore64 sp+56, x18 // flags = notrap aligned -; xstore64 sp+48, x20 // flags = notrap aligned +; stack_alloc32 112 +; xstore64 sp+104, x18 // flags = notrap aligned +; xstore64 sp+96, x19 // flags = notrap aligned +; xstore64 sp+88, x20 // flags = notrap aligned +; xstore64 sp+80, x21 // flags = notrap aligned +; xstore64 sp+72, x23 // flags = notrap aligned +; xstore64 sp+64, x24 // flags = notrap aligned +; xstore64 sp+56, x25 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } @@ -265,19 +270,29 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; x18 = xload64 sp+56 // flags = notrap aligned -; x20 = xload64 sp+48 // flags = notrap aligned -; stack_free32 64 +; x18 = xload64 sp+104 // flags = notrap aligned +; x19 = xload64 sp+96 // flags = notrap aligned +; x20 = xload64 sp+88 // flags = notrap aligned +; x21 = xload64 sp+80 // flags = notrap aligned +; x23 = xload64 sp+72 // flags = notrap aligned +; x24 = xload64 sp+64 // flags = notrap aligned +; x25 = xload64 sp+56 // flags = notrap aligned +; stack_free32 112 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; stack_alloc32 64 -; xstore64le_offset32 sp, 56, x18 -; xstore64le_offset32 sp, 48, x20 +; stack_alloc32 112 +; xstore64le_offset32 sp, 104, x18 +; xstore64le_offset32 sp, 96, x19 +; xstore64le_offset32 sp, 88, x20 +; xstore64le_offset32 sp, 80, x21 +; xstore64le_offset32 sp, 72, x23 +; xstore64le_offset32 sp, 64, x24 +; xstore64le_offset32 sp, 56, x25 ; xmov x0, sp -; call 0x0 // target = 0x17 +; call 0x0 // target = 0x3a ; xmov x18, x13 ; xmov x20, x11 ; xload64le_offset32 x24, sp, 0 @@ -310,9 +325,14 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; xload64le_offset32 x18, sp, 56 -; xload64le_offset32 x20, sp, 48 -; stack_free32 64 +; xload64le_offset32 x18, sp, 104 +; xload64le_offset32 x19, sp, 96 +; xload64le_offset32 x20, sp, 88 +; xload64le_offset32 x21, sp, 80 +; xload64le_offset32 x23, sp, 72 +; xload64le_offset32 x24, sp, 64 +; xload64le_offset32 x25, sp, 56 +; stack_free32 112 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/jump.clif b/cranelift/filetests/filetests/isa/pulley64/jump.clif index fbdc7c988280..7e1b2cc60dbb 100644 --- a/cranelift/filetests/filetests/isa/pulley64/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley64/jump.clif @@ -19,7 +19,8 @@ block3(v3: i8): ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x5, x0 +; br_if x5, label2; jump label1 ; block1: ; xconst8 x0, 0 ; jump label3 @@ -30,9 +31,10 @@ block3(v3: i8): ; ret ; ; Disassembled: -; br_if x0, 0xe // target = 0xe +; zext8 x5, x0 +; br_if x5, 0xe // target = 0x11 ; xconst8 x0, 0 -; jump 0x8 // target = 0x11 +; jump 0x8 // target = 0x14 ; xconst8 x0, 1 ; ret diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index c6dbabdafbcd..e89270f33895 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1965,9 +1965,7 @@ impl Config { // errors are panics though due to unimplemented bits in ABI // code and those causes are listed here. if self.compiler_target().is_pulley() { - return WasmFeatures::SIMD - | WasmFeatures::RELAXED_SIMD - | WasmFeatures::TAIL_CALL + return WasmFeatures::TAIL_CALL | WasmFeatures::MEMORY64 | WasmFeatures::GC_TYPES; } diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index 61c78620b1b7..e59b80ee5b8a 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -745,6 +745,12 @@ impl VMFuncRef { caller: *mut VMOpaqueContext, args_and_results: *mut [ValRaw], ) -> bool { + // If `caller` is actually a `VMArrayCallHostFuncContext` then skip the + // interpreter, even though it's available, as `array_call` will be + // native code. + if (*self.vmctx).magic == wasmtime_environ::VM_ARRAY_CALL_HOST_FUNC_MAGIC { + return self.array_call_native(caller, args_and_results); + } vm.call(self.array_call.cast(), self.vmctx, caller, args_and_results) } diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index a39e50b1669f..b5d9d7011f09 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -429,9 +429,70 @@ impl WastTest { "misc_testsuite/partial-init-memory-segment.wast", "misc_testsuite/rs2wasm-add-func.wast", "misc_testsuite/stack_overflow.wast", + "misc_testsuite/winch/_simd_linking.wast", "misc_testsuite/winch/misc.wast", "misc_testsuite/winch/oob.wast", + "spec_testsuite/address.wast", + "spec_testsuite/binary-leb128.wast", + "spec_testsuite/binary.wast", + "spec_testsuite/comments.wast", + "spec_testsuite/custom.wast", + "spec_testsuite/data.wast", + "spec_testsuite/exports.wast", + "spec_testsuite/forward.wast", + "spec_testsuite/inline-module.wast", + "spec_testsuite/int_literals.wast", + "spec_testsuite/memory_copy.wast", + "spec_testsuite/memory_fill.wast", + "spec_testsuite/memory_init.wast", + "spec_testsuite/names.wast", + "spec_testsuite/obsolete-keywords.wast", + "spec_testsuite/proposals/annotations/annotations.wast", + "spec_testsuite/proposals/annotations/id.wast", + "spec_testsuite/proposals/annotations/token.wast", "spec_testsuite/proposals/exception-handling/binary.wast", + "spec_testsuite/proposals/multi-memory/address0.wast", + "spec_testsuite/proposals/multi-memory/address1.wast", + "spec_testsuite/proposals/multi-memory/binary.wast", + "spec_testsuite/proposals/multi-memory/binary0.wast", + "spec_testsuite/proposals/multi-memory/data.wast", + "spec_testsuite/proposals/multi-memory/data0.wast", + "spec_testsuite/proposals/multi-memory/data1.wast", + "spec_testsuite/proposals/multi-memory/data_drop0.wast", + "spec_testsuite/proposals/multi-memory/exports0.wast", + "spec_testsuite/proposals/multi-memory/imports1.wast", + "spec_testsuite/proposals/multi-memory/imports2.wast", + "spec_testsuite/proposals/multi-memory/linking1.wast", + "spec_testsuite/proposals/multi-memory/load0.wast", + "spec_testsuite/proposals/multi-memory/load1.wast", + "spec_testsuite/proposals/multi-memory/memory-multi.wast", + "spec_testsuite/proposals/multi-memory/memory_copy0.wast", + "spec_testsuite/proposals/multi-memory/memory_copy1.wast", + "spec_testsuite/proposals/multi-memory/memory_fill0.wast", + "spec_testsuite/proposals/multi-memory/memory_init0.wast", + "spec_testsuite/proposals/multi-memory/memory_size3.wast", + "spec_testsuite/proposals/multi-memory/memory_trap1.wast", + "spec_testsuite/proposals/multi-memory/start0.wast", + "spec_testsuite/proposals/multi-memory/store.wast", + "spec_testsuite/proposals/multi-memory/store0.wast", + "spec_testsuite/proposals/multi-memory/store1.wast", + "spec_testsuite/proposals/multi-memory/traps0.wast", + "spec_testsuite/simd_linking.wast", + "spec_testsuite/skip-stack-guard-page.wast", + "spec_testsuite/start.wast", + "spec_testsuite/store.wast", + "spec_testsuite/table-sub.wast", + "spec_testsuite/table.wast", + "spec_testsuite/token.wast", + "spec_testsuite/type.wast", + "spec_testsuite/unreachable.wast", + "spec_testsuite/unreached-invalid.wast", + "spec_testsuite/unreached-valid.wast", + "spec_testsuite/unwind.wast", + "spec_testsuite/utf8-custom-section-id.wast", + "spec_testsuite/utf8-import-field.wast", + "spec_testsuite/utf8-import-module.wast", + "spec_testsuite/utf8-invalid-encoding.wast", "threads/exports.wast", ]; @@ -445,7 +506,10 @@ impl WastTest { let supported32bit = [ "misc_testsuite/winch/table_grow.wast", "misc_testsuite/table_grow_with_funcref.wast", - // ... + "spec_testsuite/proposals/multi-memory/trap0.wast", + "spec_testsuite/proposals/multi-memory/memory_trap0.wast", + "spec_testsuite/proposals/multi-memory/linking2.wast", + "spec_testsuite/memory_trap.wast", ]; if cfg!(target_pointer_width = "32") { if supported32bit.iter().any(|part| self.path.ends_with(part)) { diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index d5e872e2b24e..c0baf40dd145 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -534,6 +534,24 @@ impl Default for VRegVal { } } +#[allow(missing_docs)] +impl VRegVal { + pub fn new_u128(i: u128) -> Self { + let mut val = Self::default(); + val.set_u128(i); + val + } + + pub fn get_u128(&self) -> u128 { + let val = unsafe { self.0.u128 }; + u128::from_le(val) + } + + pub fn set_u128(&mut self, val: u128) { + self.0.u128 = val.to_le(); + } +} + /// The machine state for a Pulley virtual machine: the various registers and /// stack. pub struct MachineState { @@ -1329,6 +1347,20 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vload128le_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u128(u128::from_le(val)); + ControlFlow::Continue(()) + } + + fn vstore128le_offset32(&mut self, ptr: XReg, offset: i32, src: VReg) -> ControlFlow { + let val = self.state[src].get_u128(); + unsafe { + self.store(ptr, offset, val.to_le()); + } + ControlFlow::Continue(()) + } + fn xpush32(&mut self, src: XReg) -> ControlFlow { self.push::(self.state[src].get_u32())?; ControlFlow::Continue(()) diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 112a93b4d04e..43773240621a 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -253,6 +253,11 @@ macro_rules! for_each_op { /// `*(ptr + offset) = src` fstore64le_offset32 = Fstore64LeOffset32 { ptr: XReg, offset: i32, src: FReg }; + /// `dst = *(ptr + offset)` + vload128le_offset32 = VLoad128Offset32 { dst: VReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = src` + vstore128le_offset32 = Vstore128LeOffset32 { ptr: XReg, offset: i32, src: VReg }; + /// `push lr; push fp; fp = sp` push_frame = PushFrame ; /// `sp = fp; pop fp; pop lr` diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index 94d40d567123..f39f6875a934 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -7,13 +7,14 @@ ) ;; wasm[0]::function[0]: ;; push_frame -;; xload64le_offset32 x7, x0, 8 -;; xload64le_offset32 x8, x0, 32 -;; xload64le_offset32 x8, x8, 0 -;; xload64le_offset32 x7, x7, 8 -;; xulteq64 x7, x7, x8 -;; br_if x7, 0x8 // target = 0x28 -;; 26: pop_frame +;; xload64le_offset32 x8, x0, 8 +;; xload64le_offset32 x9, x0, 32 +;; xload64le_offset32 x9, x9, 0 +;; xload64le_offset32 x8, x8, 8 +;; xulteq64 x8, x8, x9 +;; zext8 x8, x8 +;; br_if x8, 0x8 // target = 0x2b +;; 29: pop_frame ;; ret -;; 28: call 0x9c // target = 0xc4 -;; 2d: jump 0xfffffffffffffff9 // target = 0x26 +;; 2b: call 0xa2 // target = 0xcd +;; 30: jump 0xfffffffffffffff9 // target = 0x29 From bb68f41615b29d778608477f0575b90692d91315 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 08:48:07 -0700 Subject: [PATCH 18/30] pulley: Ungate memory64 feature (#9780) * pulley: Ungate memory64 feature This commit is similar to #9779 in that it's removing a proposal from the "known list of panicking features" for Pulley to allow more tests to run on Pulley. This then fills out a few miscellaneous instructions to get a full suite of tests passing in Pulley related to memory64 and other instructions. prtest:full * Add notes about spectre speculation --- .../codegen/src/isa/pulley_shared/lower.isle | 29 ++++++++++++ .../code_translator/bounds_checks.rs | 28 +++++++++-- crates/wasmtime/src/config.rs | 4 +- crates/wasmtime/src/runtime/vm/memory.rs | 16 ------- crates/wast-util/src/lib.rs | 46 +++++++++++-------- pulley/src/interp.rs | 42 +++++++++++++++++ pulley/src/lib.rs | 13 ++++++ 7 files changed, 136 insertions(+), 42 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 693bc7c35719..7b7e98eb4fae 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -186,6 +186,30 @@ (rule (lower (has_type $I32 (sdiv a b))) (pulley_xdiv32_s a b)) +;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ishl a b))) + (pulley_xshl32 a b)) + +(rule (lower (has_type $I64 (ishl a b))) + (pulley_xshl64 a b)) + +;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ushr a b))) + (pulley_xshr32_u a b)) + +(rule (lower (has_type $I64 (ushr a b))) + (pulley_xshr64_u a b)) + +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sshr a b))) + (pulley_xshr32_s a b)) + +(rule (lower (has_type $I64 (sshr a b))) + (pulley_xshr64_s a b)) + ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I32 (band a b))) @@ -338,6 +362,11 @@ (rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I32)))) (pulley_sext32 val)) +;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _ty) (ireduce src))) + src) + ;;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I32 (uadd_overflow_trap a b tc))) diff --git a/crates/cranelift/src/translate/code_translator/bounds_checks.rs b/crates/cranelift/src/translate/code_translator/bounds_checks.rs index 9993410fbe6b..49d8ee40545d 100644 --- a/crates/cranelift/src/translate/code_translator/bounds_checks.rs +++ b/crates/cranelift/src/translate/code_translator/bounds_checks.rs @@ -491,11 +491,29 @@ fn cast_index_to_pointer_ty( if index_ty == pointer_ty { return index; } - // Note that using 64-bit heaps on a 32-bit host is not currently supported, - // would require at least a bounds check here to ensure that the truncation - // from 64-to-32 bits doesn't lose any upper bits. For now though we're - // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast. - assert!(index_ty.bits() < pointer_ty.bits()); + + // If the index size is larger than the pointer, that means that this is a + // 32-bit host platform with a 64-bit wasm linear memory. If the index is + // larger than 2**32 then that's guranteed to be out-of-bounds, otherwise we + // `ireduce` the index. + // + // Also note that at this time this branch doesn't support pcc nor the + // value-label-ranges of the below path. + // + // Finally, note that the returned `low_bits` here are still subject to an + // explicit bounds check in wasm so in terms of Spectre speculation on + // either side of the `trapnz` should be ok. + if index_ty.bits() > pointer_ty.bits() { + assert_eq!(index_ty, ir::types::I64); + assert_eq!(pointer_ty, ir::types::I32); + let low_bits = pos.ins().ireduce(pointer_ty, index); + let c32 = pos.ins().iconst(pointer_ty, 32); + let high_bits = pos.ins().ushr(index, c32); + let high_bits = pos.ins().ireduce(pointer_ty, high_bits); + pos.ins() + .trapnz(high_bits, ir::TrapCode::HEAP_OUT_OF_BOUNDS); + return low_bits; + } // Convert `index` to `addr_ty`. let extended_index = pos.ins().uextend(pointer_ty, index); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index e89270f33895..f283d1b459fc 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1965,9 +1965,7 @@ impl Config { // errors are panics though due to unimplemented bits in ABI // code and those causes are listed here. if self.compiler_target().is_pulley() { - return WasmFeatures::TAIL_CALL - | WasmFeatures::MEMORY64 - | WasmFeatures::GC_TYPES; + return WasmFeatures::TAIL_CALL | WasmFeatures::GC_TYPES; } // Other Cranelift backends are either 100% missing or complete diff --git a/crates/wasmtime/src/runtime/vm/memory.rs b/crates/wasmtime/src/runtime/vm/memory.rs index 116830f41ed0..7d5e23760abb 100644 --- a/crates/wasmtime/src/runtime/vm/memory.rs +++ b/crates/wasmtime/src/runtime/vm/memory.rs @@ -290,22 +290,6 @@ impl Memory { // overkill for this purpose. let absolute_max = 0usize.wrapping_sub(page_size); - // Sanity-check what should already be true from wasm module validation. - // Note that for 32-bit targets the absolute maximum is `1<<32` during - // compilation, not one-page-less-than-u32::MAX, so need to handle that - // specially here. - let absolute_max64 = if cfg!(target_pointer_width = "32") { - 1 << 32 - } else { - u64::try_from(absolute_max).unwrap() - }; - if let Ok(size) = ty.minimum_byte_size() { - assert!(size <= absolute_max64); - } - if let Ok(max) = ty.maximum_byte_size() { - assert!(max <= absolute_max64); - } - // If the minimum memory size overflows the size of our own address // space, then we can't satisfy this request, but defer the error to // later so the `store` can be informed that an effective oom is diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index b5d9d7011f09..daa695ea27d1 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -415,6 +415,7 @@ impl WastTest { "misc_testsuite/component-model/nested.wast", "misc_testsuite/component-model/types.wast", "misc_testsuite/control-flow.wast", + "misc_testsuite/custom-page-sizes/custom-page-sizes.wast", "misc_testsuite/elem-ref-null.wast", "misc_testsuite/elem_drop.wast", "misc_testsuite/empty.wast", @@ -426,12 +427,23 @@ impl WastTest { "misc_testsuite/imported-memory-copy.wast", "misc_testsuite/issue4857.wast", "misc_testsuite/memory-copy.wast", + "misc_testsuite/memory64/bounds.wast", + "misc_testsuite/memory64/linking-errors.wast", + "misc_testsuite/memory64/linking.wast", + "misc_testsuite/memory64/multi-memory.wast", + "misc_testsuite/memory64/offsets.wast", + "misc_testsuite/multi-memory/simple.wast", "misc_testsuite/partial-init-memory-segment.wast", "misc_testsuite/rs2wasm-add-func.wast", "misc_testsuite/stack_overflow.wast", + "misc_testsuite/table_grow_with_funcref.wast", + "misc_testsuite/threads/atomics_notify.wast", + "misc_testsuite/threads/atomics_wait_address.wast", + "misc_testsuite/threads/wait_notify.wast", "misc_testsuite/winch/_simd_linking.wast", "misc_testsuite/winch/misc.wast", "misc_testsuite/winch/oob.wast", + "misc_testsuite/winch/table_grow.wast", "spec_testsuite/address.wast", "spec_testsuite/binary-leb128.wast", "spec_testsuite/binary.wast", @@ -445,11 +457,14 @@ impl WastTest { "spec_testsuite/memory_copy.wast", "spec_testsuite/memory_fill.wast", "spec_testsuite/memory_init.wast", + "spec_testsuite/memory_size.wast", + "spec_testsuite/memory_trap.wast", "spec_testsuite/names.wast", "spec_testsuite/obsolete-keywords.wast", "spec_testsuite/proposals/annotations/annotations.wast", "spec_testsuite/proposals/annotations/id.wast", "spec_testsuite/proposals/annotations/token.wast", + "spec_testsuite/proposals/custom-page-sizes/custom-page-sizes.wast", "spec_testsuite/proposals/exception-handling/binary.wast", "spec_testsuite/proposals/multi-memory/address0.wast", "spec_testsuite/proposals/multi-memory/address1.wast", @@ -462,7 +477,9 @@ impl WastTest { "spec_testsuite/proposals/multi-memory/exports0.wast", "spec_testsuite/proposals/multi-memory/imports1.wast", "spec_testsuite/proposals/multi-memory/imports2.wast", + "spec_testsuite/proposals/multi-memory/imports4.wast", "spec_testsuite/proposals/multi-memory/linking1.wast", + "spec_testsuite/proposals/multi-memory/linking2.wast", "spec_testsuite/proposals/multi-memory/load0.wast", "spec_testsuite/proposals/multi-memory/load1.wast", "spec_testsuite/proposals/multi-memory/memory-multi.wast", @@ -470,13 +487,23 @@ impl WastTest { "spec_testsuite/proposals/multi-memory/memory_copy1.wast", "spec_testsuite/proposals/multi-memory/memory_fill0.wast", "spec_testsuite/proposals/multi-memory/memory_init0.wast", + "spec_testsuite/proposals/multi-memory/memory_size.wast", + "spec_testsuite/proposals/multi-memory/memory_size0.wast", + "spec_testsuite/proposals/multi-memory/memory_size1.wast", + "spec_testsuite/proposals/multi-memory/memory_size2.wast", "spec_testsuite/proposals/multi-memory/memory_size3.wast", + "spec_testsuite/proposals/multi-memory/memory_trap0.wast", "spec_testsuite/proposals/multi-memory/memory_trap1.wast", "spec_testsuite/proposals/multi-memory/start0.wast", "spec_testsuite/proposals/multi-memory/store.wast", "spec_testsuite/proposals/multi-memory/store0.wast", "spec_testsuite/proposals/multi-memory/store1.wast", + "spec_testsuite/proposals/multi-memory/trap0.wast", "spec_testsuite/proposals/multi-memory/traps0.wast", + "spec_testsuite/proposals/threads/atomics_notify.wast", + "spec_testsuite/proposals/threads/atomics_wait_address.wast", + "spec_testsuite/proposals/threads/exports.wast", + "spec_testsuite/proposals/threads/wait_notify.wast", "spec_testsuite/simd_linking.wast", "spec_testsuite/skip-stack-guard-page.wast", "spec_testsuite/start.wast", @@ -493,30 +520,13 @@ impl WastTest { "spec_testsuite/utf8-import-field.wast", "spec_testsuite/utf8-import-module.wast", "spec_testsuite/utf8-invalid-encoding.wast", - "threads/exports.wast", + "misc_testsuite/memory64/more-than-4gb.wast", ]; if supported.iter().any(|part| self.path.ends_with(part)) { return false; } - // FIXME: once the backend has enough instruction support move these - // into the above tests since they should pass on 64-bit platforms - // as well. - let supported32bit = [ - "misc_testsuite/winch/table_grow.wast", - "misc_testsuite/table_grow_with_funcref.wast", - "spec_testsuite/proposals/multi-memory/trap0.wast", - "spec_testsuite/proposals/multi-memory/memory_trap0.wast", - "spec_testsuite/proposals/multi-memory/linking2.wast", - "spec_testsuite/memory_trap.wast", - ]; - if cfg!(target_pointer_width = "32") { - if supported32bit.iter().any(|part| self.path.ends_with(part)) { - return false; - } - } - return true; } diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index c0baf40dd145..3e468c5b85eb 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1131,6 +1131,48 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn xshl32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_shl(b)); + ControlFlow::Continue(()) + } + + fn xshr32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshr32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_i32(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshl64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u64(a.wrapping_shl(b)); + ControlFlow::Continue(()) + } + + fn xshr64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u64(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshr64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_i64(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + fn xeq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 43773240621a..87989ea654e3 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -184,6 +184,19 @@ macro_rules! for_each_op { /// 64-bit wrapping subtraction: `dst = src1 - src2`. xsub64 = Xsub64 { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) << low5(src2)` + xshl32 = Xshl32 { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) >> low5(src2)` + xshr32_s = Xshr32S { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) >> low5(src2)` + xshr32_u = Xshr32U { operands: BinaryOperands }; + /// `dst = src1 << low5(src2)` + xshl64 = Xshl64 { operands: BinaryOperands }; + /// `dst = src1 >> low6(src2)` + xshr64_s = Xshr64S { operands: BinaryOperands }; + /// `dst = src1 >> low6(src2)` + xshr64_u = Xshr64U { operands: BinaryOperands }; + /// 64-bit equality. xeq64 = Xeq64 { operands: BinaryOperands }; /// 64-bit inequality. From d2a22d4735e322980af089802a30788fc117f5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Wed, 11 Dec 2024 11:02:25 -0500 Subject: [PATCH 19/30] winch(aarch64): Handle division signedness (#9784) This commit fixes how signedness division is hanlded in aarch64. Prior to this commit, sign-extension was emitted unconditionally. This commit ensures that the correct extension is emitted depending on the division kind. Additionally this commit prefers making use of existing assembler helpers. --- tests/disas/winch/aarch64/i32_divu/const.wat | 4 +- .../disas/winch/aarch64/i32_divu/one_zero.wat | 4 +- tests/disas/winch/aarch64/i32_divu/params.wat | 4 +- tests/disas/winch/aarch64/i32_divu/signed.wat | 4 +- .../winch/aarch64/i32_divu/zero_zero.wat | 4 +- winch/codegen/src/isa/aarch64/asm.rs | 55 ++++++++----------- winch/codegen/src/masm.rs | 1 + 7 files changed, 33 insertions(+), 43 deletions(-) diff --git a/tests/disas/winch/aarch64/i32_divu/const.wat b/tests/disas/winch/aarch64/i32_divu/const.wat index 3744727fab30..777d40c8ee78 100644 --- a/tests/disas/winch/aarch64/i32_divu/const.wat +++ b/tests/disas/winch/aarch64/i32_divu/const.wat @@ -23,8 +23,8 @@ ;; mov x16, #0x14 ;; mov w1, w16 ;; cbz x0, #0x54 -;; 34: sxtw x0, w0 -;; sxtw x1, w1 +;; 34: mov w0, w0 +;; mov w1, w1 ;; udiv x1, x1, x0 ;; mov w0, w1 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/i32_divu/one_zero.wat b/tests/disas/winch/aarch64/i32_divu/one_zero.wat index a6ed69986a08..4b2887f1c7c6 100644 --- a/tests/disas/winch/aarch64/i32_divu/one_zero.wat +++ b/tests/disas/winch/aarch64/i32_divu/one_zero.wat @@ -23,8 +23,8 @@ ;; mov x16, #1 ;; mov w1, w16 ;; cbz x0, #0x54 -;; 34: sxtw x0, w0 -;; sxtw x1, w1 +;; 34: mov w0, w0 +;; mov w1, w1 ;; udiv x1, x1, x0 ;; mov w0, w1 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/i32_divu/params.wat b/tests/disas/winch/aarch64/i32_divu/params.wat index 33014c3f0911..f47118cc1bd8 100644 --- a/tests/disas/winch/aarch64/i32_divu/params.wat +++ b/tests/disas/winch/aarch64/i32_divu/params.wat @@ -23,8 +23,8 @@ ;; ldur w0, [x28] ;; ldur w1, [x28, #4] ;; cbz x0, #0x54 -;; 34: sxtw x0, w0 -;; sxtw x1, w1 +;; 34: mov w0, w0 +;; mov w1, w1 ;; udiv x1, x1, x0 ;; mov w0, w1 ;; add sp, sp, #0x18 diff --git a/tests/disas/winch/aarch64/i32_divu/signed.wat b/tests/disas/winch/aarch64/i32_divu/signed.wat index f94f5aaef9b0..aa796a5cd616 100644 --- a/tests/disas/winch/aarch64/i32_divu/signed.wat +++ b/tests/disas/winch/aarch64/i32_divu/signed.wat @@ -23,8 +23,8 @@ ;; orr x16, xzr, #0xffffffff ;; mov w1, w16 ;; cbz x0, #0x54 -;; 34: sxtw x0, w0 -;; sxtw x1, w1 +;; 34: mov w0, w0 +;; mov w1, w1 ;; udiv x1, x1, x0 ;; mov w0, w1 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/i32_divu/zero_zero.wat b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat index e1733ff11ae9..e98e8115bc80 100644 --- a/tests/disas/winch/aarch64/i32_divu/zero_zero.wat +++ b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat @@ -23,8 +23,8 @@ ;; mov x16, #0 ;; mov w1, w16 ;; cbz x0, #0x54 -;; 34: sxtw x0, w0 -;; sxtw x1, w1 +;; 34: mov w0, w0 +;; mov w1, w1 ;; udiv x1, x1, x0 ;; mov w0, w1 ;; add sp, sp, #0x10 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index f94d6fc81758..47905aba280d 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -407,23 +407,21 @@ impl Assembler { kind: DivKind, size: OperandSize, ) { - // Check for division by 0 - self.emit(Inst::TrapIf { - kind: CondBrKind::Zero(divisor.into()), - trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO, - }); + // Check for division by 0. + self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO); // check for overflow if kind == DivKind::Signed { - // we first check whether the divisor is -1 - self.emit(Inst::AluRRImm12 { - alu_op: ALUOp::AddS, - size: size.into(), - rd: writable!(zero().into()), - rn: divisor.into(), - imm12: Imm12::maybe_from_u64(1).expect("1 fits in 12 bits"), - }); - // if it is -1, then we check if the dividend is MIN + // Check for divisor overflow. + self.emit_alu_rri( + ALUOp::AddS, + Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"), + divisor, + writable!(zero()), + size, + ); + + // Check if the dividend is 1. self.emit(Inst::CCmpImm { size: size.into(), rn: dividend.into(), @@ -432,31 +430,22 @@ impl Assembler { cond: Cond::Eq, }); - // Finally, trap if the previous operation overflowed - self.emit(Inst::TrapIf { - kind: CondBrKind::Cond(Cond::Vs), - trap_code: TrapCode::INTEGER_OVERFLOW, - }) + // Finally, trap if the previous operation overflowed. + self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW); } // `cranelift-codegen` doesn't support emitting u/sdiv for anything but I64, // we therefore sign-extend the operand. // see: https://github.com/bytecodealliance/wasmtime/issues/9766 if size == OperandSize::S32 { - self.emit(Inst::Extend { - rd: writable!(divisor.into()), - rn: divisor.into(), - signed: true, - from_bits: 32, - to_bits: 64, - }); - self.emit(Inst::Extend { - rd: writable!(dividend.into()), - rn: dividend.into(), - signed: true, - from_bits: 32, - to_bits: 64, - }); + let extend_kind = if kind == DivKind::Signed { + ExtendKind::I64Extend32S + } else { + ExtendKind::I64ExtendI32U + }; + + self.extend(divisor, writable!(divisor), extend_kind); + self.extend(dividend, writable!(dividend), extend_kind); } let op = match kind { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index da25d88c486a..53e97bca3cef 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -163,6 +163,7 @@ pub(crate) enum ShiftKind { /// Kinds of extends in WebAssembly. Each MacroAssembler implementation /// is responsible for emitting the correct sequence of instructions when /// lowering to machine code. +#[derive(Copy, Clone)] pub(crate) enum ExtendKind { /// Sign extends i32 to i64. I64ExtendI32S, From b5101ececc3416e9b64b13a3be1d145d5185cea4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 09:30:27 -0700 Subject: [PATCH 20/30] pulley: Ungate the GC_TYPES feature from Pulley (#9785) * pulley: Ungate the `GC_TYPES` feature from Pulley Similar to #9779 this remove the `GC_TYPES` feature from the list of panicking features in Pulley. In doing so this then additionally fixes a number of panics and then adds more tests that are working. Some other minor instructions are filled out to ensure that tests are working on both 32 and 64-bit platforms. * Flag test as now passing --- .../codegen/src/isa/pulley_shared/inst/mod.rs | 5 +++- .../codegen/src/isa/pulley_shared/lower.isle | 15 +++++++++-- crates/cranelift/src/gc/enabled/drc.rs | 5 ++-- crates/cranelift/src/gc/enabled/null.rs | 5 ++-- crates/wasmtime/src/config.rs | 2 +- .../wasmtime/src/runtime/externals/table.rs | 16 +----------- crates/wast-util/src/lib.rs | 25 ++++++++++++++++++- pulley/src/interp.rs | 21 ++++++++++++++++ pulley/src/lib.rs | 8 +++++- 9 files changed, 75 insertions(+), 27 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index d85e683011b8..3ab1c72033b4 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -378,7 +378,10 @@ where match self.inst { Inst::Raw { raw: RawInst::Trap { .. }, - } => true, + } + | Inst::Call { .. } + | Inst::IndirectCall { .. } + | Inst::IndirectCallHost { .. } => true, _ => false, } } diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 7b7e98eb4fae..83f90d1f595d 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -212,8 +212,19 @@ ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type $I32 (band a b))) - (pulley_xand32 a b)) +(rule 0 (lower (has_type (fits_in_32 _) (band a b))) + (pulley_xand32 a b)) + +(rule 1 (lower (has_type $I64 (band a b))) + (pulley_xand64 a b)) + +;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (fits_in_32 _) (bor a b))) + (pulley_xor32 a b)) + +(rule 1 (lower (has_type $I64 (bor a b))) + (pulley_xor64 a b)) ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/cranelift/src/gc/enabled/drc.rs b/crates/cranelift/src/gc/enabled/drc.rs index 47b066607238..412e644cd292 100644 --- a/crates/cranelift/src/gc/enabled/drc.rs +++ b/crates/cranelift/src/gc/enabled/drc.rs @@ -301,6 +301,7 @@ impl GcCompiler for DrcCompiler { init: super::ArrayInit<'_>, ) -> WasmResult { let interned_type_index = func_env.module.types[array_type_index]; + let ptr_ty = func_env.pointer_type(); let len_offset = gc_compiler(func_env)?.layouts().array_length_field_offset(); let array_layout = func_env.array_layout(interned_type_index).clone(); @@ -338,9 +339,7 @@ impl GcCompiler for DrcCompiler { .store(ir::MemFlags::trusted(), len, len_addr, 0); // Finally, initialize the elements. - let len_to_elems_delta = builder - .ins() - .iconst(ir::types::I64, i64::from(len_to_elems_delta)); + let len_to_elems_delta = builder.ins().iconst(ptr_ty, i64::from(len_to_elems_delta)); let elems_addr = builder.ins().iadd(len_addr, len_to_elems_delta); init.initialize( func_env, diff --git a/crates/cranelift/src/gc/enabled/null.rs b/crates/cranelift/src/gc/enabled/null.rs index a5367893a4ee..bf478f82f511 100644 --- a/crates/cranelift/src/gc/enabled/null.rs +++ b/crates/cranelift/src/gc/enabled/null.rs @@ -155,6 +155,7 @@ impl GcCompiler for NullCompiler { init: super::ArrayInit<'_>, ) -> WasmResult { let interned_type_index = func_env.module.types[array_type_index]; + let ptr_ty = func_env.pointer_type(); let len_offset = gc_compiler(func_env)?.layouts().array_length_field_offset(); let array_layout = func_env.array_layout(interned_type_index).clone(); @@ -190,9 +191,7 @@ impl GcCompiler for NullCompiler { .store(ir::MemFlags::trusted(), len, len_addr, 0); // Finally, initialize the elements. - let len_to_elems_delta = builder - .ins() - .iconst(ir::types::I64, i64::from(len_to_elems_delta)); + let len_to_elems_delta = builder.ins().iconst(ptr_ty, i64::from(len_to_elems_delta)); let elems_addr = builder.ins().iadd(len_addr, len_to_elems_delta); init.initialize( func_env, diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index f283d1b459fc..951a49058b2f 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1965,7 +1965,7 @@ impl Config { // errors are panics though due to unimplemented bits in ABI // code and those causes are listed here. if self.compiler_target().is_pulley() { - return WasmFeatures::TAIL_CALL | WasmFeatures::GC_TYPES; + return WasmFeatures::TAIL_CALL; } // Other Cranelift backends are either 100% missing or complete diff --git a/crates/wasmtime/src/runtime/externals/table.rs b/crates/wasmtime/src/runtime/externals/table.rs index 7aff2a678cdf..24f611a4c005 100644 --- a/crates/wasmtime/src/runtime/externals/table.rs +++ b/crates/wasmtime/src/runtime/externals/table.rs @@ -441,7 +441,6 @@ impl Table { mod tests { use super::*; use crate::{Instance, Module, Store}; - use wasmtime_environ::TripleExt; #[test] fn hash_key_is_stable_across_duplicate_store_data_entries() -> Result<()> { @@ -453,20 +452,7 @@ mod tests { (table (export "t") 1 1 externref) ) "#, - ); - // Expect this test to fail on pulley at this time. When pulley supports - // externref this should switch back to using `?` on the constructor - // above for all platforms. - let module = match module { - Ok(module) => { - assert!(!store.engine().target().is_pulley()); - module - } - Err(e) => { - assert!(store.engine().target().is_pulley(), "bad error {e:?}"); - return Ok(()); - } - }; + )?; let instance = Instance::new(&mut store, &module, &[])?; // Each time we `get_table`, we call `Table::from_wasmtime` which adds diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index daa695ea27d1..ab073a4b0e11 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -412,7 +412,9 @@ impl WastTest { "misc_testsuite/component-model/import.wast", "misc_testsuite/component-model/instance.wast", "misc_testsuite/component-model/linking.wast", + "misc_testsuite/component-model/modules.wast", "misc_testsuite/component-model/nested.wast", + "misc_testsuite/component-model/resources.wast", "misc_testsuite/component-model/types.wast", "misc_testsuite/control-flow.wast", "misc_testsuite/custom-page-sizes/custom-page-sizes.wast", @@ -420,23 +422,43 @@ impl WastTest { "misc_testsuite/elem_drop.wast", "misc_testsuite/empty.wast", "misc_testsuite/export-large-signature.wast", + "misc_testsuite/externref-id-function.wast", + "misc_testsuite/externref-segment.wast", + "misc_testsuite/externref-table-dropped-segment-issue-8281.wast", "misc_testsuite/fib.wast", "misc_testsuite/func-400-params.wast", + "misc_testsuite/gc/array-alloc-too-large.wast", + "misc_testsuite/gc/array-init-data.wast", + "misc_testsuite/gc/array-new-data.wast", + "misc_testsuite/gc/array-new-elem.wast", + "misc_testsuite/gc/array-types.wast", + "misc_testsuite/gc/externref-segment.wast", + "misc_testsuite/gc/func-refs-in-gc-heap.wast", "misc_testsuite/gc/more-rec-groups-than-types.wast", + "misc_testsuite/gc/null-i31ref.wast", "misc_testsuite/gc/rec-group-funcs.wast", + "misc_testsuite/gc/struct-types.wast", "misc_testsuite/imported-memory-copy.wast", "misc_testsuite/issue4857.wast", + "misc_testsuite/linking-errors.wast", "misc_testsuite/memory-copy.wast", "misc_testsuite/memory64/bounds.wast", "misc_testsuite/memory64/linking-errors.wast", "misc_testsuite/memory64/linking.wast", + "misc_testsuite/memory64/more-than-4gb.wast", "misc_testsuite/memory64/multi-memory.wast", "misc_testsuite/memory64/offsets.wast", "misc_testsuite/multi-memory/simple.wast", + "misc_testsuite/mutable_externref_globals.wast", + "misc_testsuite/no-mixup-stack-maps.wast", "misc_testsuite/partial-init-memory-segment.wast", "misc_testsuite/rs2wasm-add-func.wast", + "misc_testsuite/simple_ref_is_null.wast", "misc_testsuite/stack_overflow.wast", "misc_testsuite/table_grow_with_funcref.wast", + "misc_testsuite/threads/LB.wast", + "misc_testsuite/threads/MP.wast", + "misc_testsuite/threads/SB.wast", "misc_testsuite/threads/atomics_notify.wast", "misc_testsuite/threads/atomics_wait_address.wast", "misc_testsuite/threads/wait_notify.wast", @@ -504,12 +526,14 @@ impl WastTest { "spec_testsuite/proposals/threads/atomics_wait_address.wast", "spec_testsuite/proposals/threads/exports.wast", "spec_testsuite/proposals/threads/wait_notify.wast", + "spec_testsuite/ref_null.wast", "spec_testsuite/simd_linking.wast", "spec_testsuite/skip-stack-guard-page.wast", "spec_testsuite/start.wast", "spec_testsuite/store.wast", "spec_testsuite/table-sub.wast", "spec_testsuite/table.wast", + "spec_testsuite/table_size.wast", "spec_testsuite/token.wast", "spec_testsuite/type.wast", "spec_testsuite/unreachable.wast", @@ -520,7 +544,6 @@ impl WastTest { "spec_testsuite/utf8-import-field.wast", "spec_testsuite/utf8-import-module.wast", "spec_testsuite/utf8-invalid-encoding.wast", - "misc_testsuite/memory64/more-than-4gb.wast", ]; if supported.iter().any(|part| self.path.ends_with(part)) { diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 3e468c5b85eb..5a25f3f7fdd1 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1578,6 +1578,27 @@ impl OpVisitor for Interpreter<'_> { self.state[operands.dst].set_u32(a & b); ControlFlow::Continue(()) } + + fn xand64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a & b); + ControlFlow::Continue(()) + } + + fn xor32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a | b); + ControlFlow::Continue(()) + } + + fn xor64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a | b); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 87989ea654e3..784e3b7aaeac 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -325,8 +325,14 @@ macro_rules! for_each_op { /// `dst = src1 / src2` (signed) xdiv32_s = XDiv32S { operands: BinaryOperands }; - /// `dst = src1 & src2` + /// `low32(dst) = low32(src1) & low32(src2)` xand32 = XAnd32 { operands: BinaryOperands }; + /// `dst = src1 & src2` + xand64 = XAnd64 { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) | low32(src2)` + xor32 = XOr32 { operands: BinaryOperands }; + /// `dst = src1 | src2` + xor64 = XOr64 { operands: BinaryOperands }; } }; } From 461a72eb259f2da149f18554cd52e701b3ceeffe Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 10:08:21 -0700 Subject: [PATCH 21/30] pulley: Update how tests are expected to fail or pass (#9786) With enough basic support here and there switch the listing of tests for Pulley from known-passing to known-failing instead. This trims the list of unsupported features to just those that are truly unsupported and then additionally rewrites the list of tests to be an exhasutive list of unsupported tests instead of a list of supported tests. This should make it a bit easier to track the progress as now pulley is "complete" once this list is empty. --- crates/wast-util/src/lib.rs | 403 ++++++++++++++++++++---------------- pulley/CONTRIBUTING.md | 12 +- 2 files changed, 235 insertions(+), 180 deletions(-) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index ab073a4b0e11..85722a75e747 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -297,24 +297,12 @@ impl Compiler { } } - // Pulley is just getting started, it implements almost no proposals - // yet. Compiler::CraneliftPulley => { - // Unsupported proposals - if config.memory64() - || config.custom_page_sizes() - || config.multi_memory() - || config.threads() - || config.gc() - || config.function_references() - || config.relaxed_simd() - || config.reference_types() - || config.tail_call() - || config.extended_const() - || config.wide_arithmetic() - || config.simd() - || config.gc_types() - { + // Unsupported proposals. Note that other proposals have partial + // support at this time (pulley is a work-in-progress) and so + // individual tests are listed below as "should fail" even if + // they're not covered in this list. + if config.tail_call() || config.wide_arithmetic() { return true; } } @@ -394,167 +382,234 @@ impl WastTest { } } - // Pulley is in a bit of a special state at this time where it supports - // only a subset of the initial MVP of WebAssembly. That means that no - // test technically passes by default but a few do happen to use just - // the right subset of wasm that we can pass it. For now maintain an - // allow-list of tests that are known to pass in Pulley. As tests are - // fixed they should get added to this list. Over time this list will - // instead get inverted to "these tests are known to fail" once Pulley - // implements more proposals. + if config.compiler.should_fail(&self.config) { + return true; + } + + // Pulley supports a mishmash of proposals at this time as it's in an + // interim state. It doesn't support all of the MVP but it supports + // enough to pass some GC tests for example. This means that + // `Compiler::should_fail` is pretty liberal (the check above). To + // handle this there's an extra check here for an exhaustive list of + // unsupported tests on Pulley. This list will get burned down as + // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { - let supported = [ - "custom-page-sizes/custom-page-sizes-invalid.wast", - "exception-handling/exports.wast", - "extended-const/data.wast", - "misc_testsuite/component-model/adapter.wast", - "misc_testsuite/component-model/aliasing.wast", - "misc_testsuite/component-model/import.wast", - "misc_testsuite/component-model/instance.wast", - "misc_testsuite/component-model/linking.wast", - "misc_testsuite/component-model/modules.wast", - "misc_testsuite/component-model/nested.wast", - "misc_testsuite/component-model/resources.wast", - "misc_testsuite/component-model/types.wast", - "misc_testsuite/control-flow.wast", - "misc_testsuite/custom-page-sizes/custom-page-sizes.wast", - "misc_testsuite/elem-ref-null.wast", - "misc_testsuite/elem_drop.wast", - "misc_testsuite/empty.wast", - "misc_testsuite/export-large-signature.wast", - "misc_testsuite/externref-id-function.wast", - "misc_testsuite/externref-segment.wast", - "misc_testsuite/externref-table-dropped-segment-issue-8281.wast", - "misc_testsuite/fib.wast", - "misc_testsuite/func-400-params.wast", - "misc_testsuite/gc/array-alloc-too-large.wast", - "misc_testsuite/gc/array-init-data.wast", - "misc_testsuite/gc/array-new-data.wast", - "misc_testsuite/gc/array-new-elem.wast", - "misc_testsuite/gc/array-types.wast", - "misc_testsuite/gc/externref-segment.wast", - "misc_testsuite/gc/func-refs-in-gc-heap.wast", - "misc_testsuite/gc/more-rec-groups-than-types.wast", - "misc_testsuite/gc/null-i31ref.wast", - "misc_testsuite/gc/rec-group-funcs.wast", - "misc_testsuite/gc/struct-types.wast", - "misc_testsuite/imported-memory-copy.wast", - "misc_testsuite/issue4857.wast", - "misc_testsuite/linking-errors.wast", - "misc_testsuite/memory-copy.wast", - "misc_testsuite/memory64/bounds.wast", - "misc_testsuite/memory64/linking-errors.wast", - "misc_testsuite/memory64/linking.wast", - "misc_testsuite/memory64/more-than-4gb.wast", - "misc_testsuite/memory64/multi-memory.wast", - "misc_testsuite/memory64/offsets.wast", - "misc_testsuite/multi-memory/simple.wast", - "misc_testsuite/mutable_externref_globals.wast", - "misc_testsuite/no-mixup-stack-maps.wast", - "misc_testsuite/partial-init-memory-segment.wast", - "misc_testsuite/rs2wasm-add-func.wast", - "misc_testsuite/simple_ref_is_null.wast", - "misc_testsuite/stack_overflow.wast", - "misc_testsuite/table_grow_with_funcref.wast", - "misc_testsuite/threads/LB.wast", - "misc_testsuite/threads/MP.wast", - "misc_testsuite/threads/SB.wast", - "misc_testsuite/threads/atomics_notify.wast", - "misc_testsuite/threads/atomics_wait_address.wast", - "misc_testsuite/threads/wait_notify.wast", - "misc_testsuite/winch/_simd_linking.wast", - "misc_testsuite/winch/misc.wast", - "misc_testsuite/winch/oob.wast", - "misc_testsuite/winch/table_grow.wast", - "spec_testsuite/address.wast", - "spec_testsuite/binary-leb128.wast", - "spec_testsuite/binary.wast", - "spec_testsuite/comments.wast", - "spec_testsuite/custom.wast", - "spec_testsuite/data.wast", - "spec_testsuite/exports.wast", - "spec_testsuite/forward.wast", - "spec_testsuite/inline-module.wast", - "spec_testsuite/int_literals.wast", - "spec_testsuite/memory_copy.wast", - "spec_testsuite/memory_fill.wast", - "spec_testsuite/memory_init.wast", - "spec_testsuite/memory_size.wast", - "spec_testsuite/memory_trap.wast", - "spec_testsuite/names.wast", - "spec_testsuite/obsolete-keywords.wast", - "spec_testsuite/proposals/annotations/annotations.wast", - "spec_testsuite/proposals/annotations/id.wast", - "spec_testsuite/proposals/annotations/token.wast", - "spec_testsuite/proposals/custom-page-sizes/custom-page-sizes.wast", - "spec_testsuite/proposals/exception-handling/binary.wast", - "spec_testsuite/proposals/multi-memory/address0.wast", - "spec_testsuite/proposals/multi-memory/address1.wast", - "spec_testsuite/proposals/multi-memory/binary.wast", - "spec_testsuite/proposals/multi-memory/binary0.wast", - "spec_testsuite/proposals/multi-memory/data.wast", - "spec_testsuite/proposals/multi-memory/data0.wast", - "spec_testsuite/proposals/multi-memory/data1.wast", - "spec_testsuite/proposals/multi-memory/data_drop0.wast", - "spec_testsuite/proposals/multi-memory/exports0.wast", - "spec_testsuite/proposals/multi-memory/imports1.wast", - "spec_testsuite/proposals/multi-memory/imports2.wast", - "spec_testsuite/proposals/multi-memory/imports4.wast", - "spec_testsuite/proposals/multi-memory/linking1.wast", - "spec_testsuite/proposals/multi-memory/linking2.wast", - "spec_testsuite/proposals/multi-memory/load0.wast", - "spec_testsuite/proposals/multi-memory/load1.wast", - "spec_testsuite/proposals/multi-memory/memory-multi.wast", - "spec_testsuite/proposals/multi-memory/memory_copy0.wast", - "spec_testsuite/proposals/multi-memory/memory_copy1.wast", - "spec_testsuite/proposals/multi-memory/memory_fill0.wast", - "spec_testsuite/proposals/multi-memory/memory_init0.wast", - "spec_testsuite/proposals/multi-memory/memory_size.wast", - "spec_testsuite/proposals/multi-memory/memory_size0.wast", - "spec_testsuite/proposals/multi-memory/memory_size1.wast", - "spec_testsuite/proposals/multi-memory/memory_size2.wast", - "spec_testsuite/proposals/multi-memory/memory_size3.wast", - "spec_testsuite/proposals/multi-memory/memory_trap0.wast", - "spec_testsuite/proposals/multi-memory/memory_trap1.wast", - "spec_testsuite/proposals/multi-memory/start0.wast", - "spec_testsuite/proposals/multi-memory/store.wast", - "spec_testsuite/proposals/multi-memory/store0.wast", - "spec_testsuite/proposals/multi-memory/store1.wast", - "spec_testsuite/proposals/multi-memory/trap0.wast", - "spec_testsuite/proposals/multi-memory/traps0.wast", - "spec_testsuite/proposals/threads/atomics_notify.wast", - "spec_testsuite/proposals/threads/atomics_wait_address.wast", - "spec_testsuite/proposals/threads/exports.wast", - "spec_testsuite/proposals/threads/wait_notify.wast", - "spec_testsuite/ref_null.wast", - "spec_testsuite/simd_linking.wast", - "spec_testsuite/skip-stack-guard-page.wast", - "spec_testsuite/start.wast", - "spec_testsuite/store.wast", - "spec_testsuite/table-sub.wast", - "spec_testsuite/table.wast", - "spec_testsuite/table_size.wast", - "spec_testsuite/token.wast", - "spec_testsuite/type.wast", - "spec_testsuite/unreachable.wast", - "spec_testsuite/unreached-invalid.wast", - "spec_testsuite/unreached-valid.wast", - "spec_testsuite/unwind.wast", - "spec_testsuite/utf8-custom-section-id.wast", - "spec_testsuite/utf8-import-field.wast", - "spec_testsuite/utf8-import-module.wast", - "spec_testsuite/utf8-invalid-encoding.wast", + let unsupported = [ + "misc_testsuite/br-table-fuzzbug.wast", + "misc_testsuite/call_indirect.wast", + "misc_testsuite/component-model/fused.wast", + "misc_testsuite/component-model/simple.wast", + "misc_testsuite/component-model/strings.wast", + "misc_testsuite/div-rem.wast", + "misc_testsuite/embenchen_fannkuch.wast", + "misc_testsuite/embenchen_fasta.wast", + "misc_testsuite/embenchen_ifs.wast", + "misc_testsuite/embenchen_primes.wast", + "misc_testsuite/float-round-doesnt-load-too-much.wast", + "misc_testsuite/function-references/call_indirect.wast", + "misc_testsuite/function-references/instance.wast", + "misc_testsuite/function-references/table_fill.wast", + "misc_testsuite/function-references/table_get.wast", + "misc_testsuite/function-references/table_grow.wast", + "misc_testsuite/function-references/table_set.wast", + "misc_testsuite/gc/anyref_that_is_i31_barriers.wast", + "misc_testsuite/gc/i31ref-of-global-initializers.wast", + "misc_testsuite/gc/i31ref-tables.wast", + "misc_testsuite/gc/ref-test.wast", + "misc_testsuite/gc/struct-instructions.wast", + "misc_testsuite/int-to-float-splat.wast", + "misc_testsuite/issue1809.wast", + "misc_testsuite/issue4840.wast", + "misc_testsuite/issue4890.wast", + "misc_testsuite/issue6562.wast", + "misc_testsuite/issue694.wast", + "misc_testsuite/many_table_gets_lead_to_gc.wast", + "misc_testsuite/memory-combos.wast", + "misc_testsuite/memory64/codegen.wast", + "misc_testsuite/memory64/simd.wast", + "misc_testsuite/memory64/threads.wast", + "misc_testsuite/misc_traps.wast", + "misc_testsuite/no-panic.wast", + "misc_testsuite/partial-init-table-segment.wast", + "misc_testsuite/rust_fannkuch.wast", + "misc_testsuite/simd/almost-extmul.wast", + "misc_testsuite/simd/canonicalize-nan.wast", + "misc_testsuite/simd/cvt-from-uint.wast", + "misc_testsuite/simd/interesting-float-splat.wast", + "misc_testsuite/simd/issue4807.wast", + "misc_testsuite/simd/issue6725-no-egraph-panic.wast", + "misc_testsuite/simd/issue_3173_select_v128.wast", + "misc_testsuite/simd/issue_3327_bnot_lowering.wast", + "misc_testsuite/simd/load_splat_out_of_bounds.wast", + "misc_testsuite/simd/replace-lane-preserve.wast", + "misc_testsuite/simd/spillslot-size-fuzzbug.wast", + "misc_testsuite/simd/unaligned-load.wast", + "misc_testsuite/simd/v128-select.wast", + "misc_testsuite/sink-float-but-dont-trap.wast", + "misc_testsuite/table_copy.wast", + "misc_testsuite/table_copy_on_imported_tables.wast", + "misc_testsuite/threads/LB_atomic.wast", + "misc_testsuite/threads/MP_atomic.wast", + "misc_testsuite/threads/MP_wait.wast", + "misc_testsuite/threads/SB_atomic.wast", + "misc_testsuite/threads/load-store-alignment.wast", + "misc_testsuite/winch/_simd_address.wast", + "misc_testsuite/winch/_simd_const.wast", + "misc_testsuite/winch/_simd_load.wast", + "misc_testsuite/winch/_simd_multivalue.wast", + "misc_testsuite/winch/_simd_store.wast", + "misc_testsuite/winch/br_table.wast", + "misc_testsuite/winch/float-comparison.wast", + "misc_testsuite/winch/global.wast", + "misc_testsuite/winch/select.wast", + "misc_testsuite/winch/table_fill.wast", + "misc_testsuite/winch/table_get.wast", + "misc_testsuite/winch/table_set.wast", + "spec_testsuite/align.wast", + "spec_testsuite/block.wast", + "spec_testsuite/br.wast", + "spec_testsuite/br_if.wast", + "spec_testsuite/br_table.wast", + "spec_testsuite/bulk.wast", + "spec_testsuite/call.wast", + "spec_testsuite/call_indirect.wast", + "spec_testsuite/const.wast", + "spec_testsuite/conversions.wast", + "spec_testsuite/elem.wast", + "spec_testsuite/endianness.wast", + "spec_testsuite/f32.wast", + "spec_testsuite/f32_bitwise.wast", + "spec_testsuite/f32_cmp.wast", + "spec_testsuite/f64.wast", + "spec_testsuite/f64_bitwise.wast", + "spec_testsuite/f64_cmp.wast", + "spec_testsuite/fac.wast", + "spec_testsuite/float_exprs.wast", + "spec_testsuite/float_literals.wast", + "spec_testsuite/float_memory.wast", + "spec_testsuite/float_misc.wast", + "spec_testsuite/func.wast", + "spec_testsuite/func_ptrs.wast", + "spec_testsuite/global.wast", + "spec_testsuite/i32.wast", + "spec_testsuite/i64.wast", + "spec_testsuite/if.wast", + "spec_testsuite/imports.wast", + "spec_testsuite/int_exprs.wast", + "spec_testsuite/labels.wast", + "spec_testsuite/left-to-right.wast", + "spec_testsuite/linking.wast", + "spec_testsuite/load.wast", + "spec_testsuite/local_get.wast", + "spec_testsuite/local_set.wast", + "spec_testsuite/local_tee.wast", + "spec_testsuite/loop.wast", + "spec_testsuite/memory.wast", + "spec_testsuite/memory_grow.wast", + "spec_testsuite/memory_redundancy.wast", + "spec_testsuite/nop.wast", + "spec_testsuite/proposals/annotations/simd_lane.wast", + "spec_testsuite/proposals/extended-const/elem.wast", + "spec_testsuite/proposals/extended-const/global.wast", + "spec_testsuite/proposals/multi-memory/align.wast", + "spec_testsuite/proposals/multi-memory/align0.wast", + "spec_testsuite/proposals/multi-memory/float_exprs0.wast", + "spec_testsuite/proposals/multi-memory/float_exprs1.wast", + "spec_testsuite/proposals/multi-memory/float_memory0.wast", + "spec_testsuite/proposals/multi-memory/imports.wast", + "spec_testsuite/proposals/multi-memory/imports0.wast", + "spec_testsuite/proposals/multi-memory/imports3.wast", + "spec_testsuite/proposals/multi-memory/linking0.wast", + "spec_testsuite/proposals/multi-memory/linking3.wast", + "spec_testsuite/proposals/multi-memory/load.wast", + "spec_testsuite/proposals/multi-memory/load2.wast", + "spec_testsuite/proposals/multi-memory/memory.wast", + "spec_testsuite/proposals/multi-memory/memory_grow.wast", + "spec_testsuite/proposals/multi-memory/simd_memory-multi.wast", + "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast", + "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", + "spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast", + "spec_testsuite/proposals/threads/atomic.wast", + "spec_testsuite/proposals/threads/imports.wast", + "spec_testsuite/proposals/threads/memory.wast", + "spec_testsuite/ref_func.wast", + "spec_testsuite/ref_is_null.wast", + "spec_testsuite/return.wast", + "spec_testsuite/select.wast", + "spec_testsuite/simd_address.wast", + "spec_testsuite/simd_align.wast", + "spec_testsuite/simd_bit_shift.wast", + "spec_testsuite/simd_bitwise.wast", + "spec_testsuite/simd_boolean.wast", + "spec_testsuite/simd_const.wast", + "spec_testsuite/simd_conversions.wast", + "spec_testsuite/simd_f32x4.wast", + "spec_testsuite/simd_f32x4_arith.wast", + "spec_testsuite/simd_f32x4_cmp.wast", + "spec_testsuite/simd_f32x4_pmin_pmax.wast", + "spec_testsuite/simd_f32x4_rounding.wast", + "spec_testsuite/simd_f64x2.wast", + "spec_testsuite/simd_f64x2_arith.wast", + "spec_testsuite/simd_f64x2_cmp.wast", + "spec_testsuite/simd_f64x2_pmin_pmax.wast", + "spec_testsuite/simd_f64x2_rounding.wast", + "spec_testsuite/simd_i16x8_arith.wast", + "spec_testsuite/simd_i16x8_arith2.wast", + "spec_testsuite/simd_i16x8_cmp.wast", + "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", + "spec_testsuite/simd_i16x8_extmul_i8x16.wast", + "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", + "spec_testsuite/simd_i16x8_sat_arith.wast", + "spec_testsuite/simd_i32x4_arith.wast", + "spec_testsuite/simd_i32x4_arith2.wast", + "spec_testsuite/simd_i32x4_cmp.wast", + "spec_testsuite/simd_i32x4_dot_i16x8.wast", + "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", + "spec_testsuite/simd_i32x4_extmul_i16x8.wast", + "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", + "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", + "spec_testsuite/simd_i64x2_arith.wast", + "spec_testsuite/simd_i64x2_arith2.wast", + "spec_testsuite/simd_i64x2_cmp.wast", + "spec_testsuite/simd_i64x2_extmul_i32x4.wast", + "spec_testsuite/simd_i8x16_arith.wast", + "spec_testsuite/simd_i8x16_arith2.wast", + "spec_testsuite/simd_i8x16_cmp.wast", + "spec_testsuite/simd_i8x16_sat_arith.wast", + "spec_testsuite/simd_int_to_int_extend.wast", + "spec_testsuite/simd_lane.wast", + "spec_testsuite/simd_load.wast", + "spec_testsuite/simd_load16_lane.wast", + "spec_testsuite/simd_load32_lane.wast", + "spec_testsuite/simd_load64_lane.wast", + "spec_testsuite/simd_load8_lane.wast", + "spec_testsuite/simd_load_extend.wast", + "spec_testsuite/simd_load_splat.wast", + "spec_testsuite/simd_load_zero.wast", + "spec_testsuite/simd_splat.wast", + "spec_testsuite/simd_store.wast", + "spec_testsuite/simd_store16_lane.wast", + "spec_testsuite/simd_store32_lane.wast", + "spec_testsuite/simd_store64_lane.wast", + "spec_testsuite/simd_store8_lane.wast", + "spec_testsuite/stack.wast", + "spec_testsuite/switch.wast", + "spec_testsuite/table_copy.wast", + "spec_testsuite/table_fill.wast", + "spec_testsuite/table_get.wast", + "spec_testsuite/table_grow.wast", + "spec_testsuite/table_init.wast", + "spec_testsuite/table_set.wast", + "spec_testsuite/traps.wast", ]; - if supported.iter().any(|part| self.path.ends_with(part)) { - return false; + if unsupported.iter().any(|part| self.path.ends_with(part)) { + return true; } - - return true; - } - - if config.compiler.should_fail(&self.config) { - return true; } // Disable spec tests for proposals that Winch does not implement yet. diff --git a/pulley/CONTRIBUTING.md b/pulley/CONTRIBUTING.md index f103343443da..69a1402125e8 100644 --- a/pulley/CONTRIBUTING.md +++ b/pulley/CONTRIBUTING.md @@ -15,10 +15,10 @@ of doing so. #### Choose a test to get passing First off find a test in this repository, probably a `*.wast` test, which isn't -currently passing. At the time of this writing almost no tests are passing, but -for an up-to-date list check out the `WastTest::should_fail` method in -`crates/wast-util/src/lib.rs`. Here we're going to select -`./tests/misc_testsuite/control-flow.wast` as it's a reasonably small test. +currently passing. Check out the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` which has a list of `unsupported` tests for +Pulley. Here we're going to select `./tests/misc_testsuite/control-flow.wast` +as it's a reasonably small test. #### See the test failure @@ -232,8 +232,8 @@ error: test failed, to rerun pass `--test wast` This indicates that the test was previously flagged as "should fail", but that assertion is no longer true! Update the `WastTest::should_fail` method in -`crates/wast-util/src/lib.rs` so that it expects the test to pass and we'll -see: +`crates/wast-util/src/lib.rs` so that it expects the test to pass by deleting +the tests from the `unsupported` list. Then we'll see: ``` $ cargo test --test wast control-flow.wast From 0fabffe91cc64a8705c9d106aeba5991d036c263 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 11:24:20 -0700 Subject: [PATCH 22/30] pulley: Get `align.wast` passing (#9789) * pulley: Get `align.wast` passing Required implementing lowerings for `f32const` and `f64const` CLIF instructions which then also got a number of other tests passing. * Undo winch change --- .../codegen/src/isa/pulley_shared/lower.isle | 10 ++++++++++ crates/wast-util/src/lib.rs | 18 ------------------ pulley/src/interp.rs | 10 ++++++++++ pulley/src/lib.rs | 5 +++++ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 83f90d1f595d..621a6cc066f2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -153,6 +153,16 @@ (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (imm ty n)) +;;;; Rules for `f32const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u32_from_ieee32 x))) + (pulley_fconst32 x)) + +;;;; Rules for `f64const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 x))) + (pulley_fconst64 x)) + ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (iadd a b))) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 85722a75e747..16c4c3863256 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -398,7 +398,6 @@ impl WastTest { "misc_testsuite/br-table-fuzzbug.wast", "misc_testsuite/call_indirect.wast", "misc_testsuite/component-model/fused.wast", - "misc_testsuite/component-model/simple.wast", "misc_testsuite/component-model/strings.wast", "misc_testsuite/div-rem.wast", "misc_testsuite/embenchen_fannkuch.wast", @@ -416,16 +415,13 @@ impl WastTest { "misc_testsuite/gc/i31ref-of-global-initializers.wast", "misc_testsuite/gc/i31ref-tables.wast", "misc_testsuite/gc/ref-test.wast", - "misc_testsuite/gc/struct-instructions.wast", "misc_testsuite/int-to-float-splat.wast", "misc_testsuite/issue1809.wast", "misc_testsuite/issue4840.wast", "misc_testsuite/issue4890.wast", "misc_testsuite/issue6562.wast", - "misc_testsuite/issue694.wast", "misc_testsuite/many_table_gets_lead_to_gc.wast", "misc_testsuite/memory-combos.wast", - "misc_testsuite/memory64/codegen.wast", "misc_testsuite/memory64/simd.wast", "misc_testsuite/memory64/threads.wast", "misc_testsuite/misc_traps.wast", @@ -458,22 +454,17 @@ impl WastTest { "misc_testsuite/winch/_simd_load.wast", "misc_testsuite/winch/_simd_multivalue.wast", "misc_testsuite/winch/_simd_store.wast", - "misc_testsuite/winch/br_table.wast", "misc_testsuite/winch/float-comparison.wast", "misc_testsuite/winch/global.wast", "misc_testsuite/winch/select.wast", "misc_testsuite/winch/table_fill.wast", "misc_testsuite/winch/table_get.wast", "misc_testsuite/winch/table_set.wast", - "spec_testsuite/align.wast", "spec_testsuite/block.wast", - "spec_testsuite/br.wast", "spec_testsuite/br_if.wast", - "spec_testsuite/br_table.wast", "spec_testsuite/bulk.wast", "spec_testsuite/call.wast", "spec_testsuite/call_indirect.wast", - "spec_testsuite/const.wast", "spec_testsuite/conversions.wast", "spec_testsuite/elem.wast", "spec_testsuite/endianness.wast", @@ -486,9 +477,7 @@ impl WastTest { "spec_testsuite/fac.wast", "spec_testsuite/float_exprs.wast", "spec_testsuite/float_literals.wast", - "spec_testsuite/float_memory.wast", "spec_testsuite/float_misc.wast", - "spec_testsuite/func.wast", "spec_testsuite/func_ptrs.wast", "spec_testsuite/global.wast", "spec_testsuite/i32.wast", @@ -506,19 +495,13 @@ impl WastTest { "spec_testsuite/loop.wast", "spec_testsuite/memory.wast", "spec_testsuite/memory_grow.wast", - "spec_testsuite/memory_redundancy.wast", "spec_testsuite/nop.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", "spec_testsuite/proposals/extended-const/elem.wast", "spec_testsuite/proposals/extended-const/global.wast", - "spec_testsuite/proposals/multi-memory/align.wast", - "spec_testsuite/proposals/multi-memory/align0.wast", "spec_testsuite/proposals/multi-memory/float_exprs0.wast", "spec_testsuite/proposals/multi-memory/float_exprs1.wast", - "spec_testsuite/proposals/multi-memory/float_memory0.wast", "spec_testsuite/proposals/multi-memory/imports.wast", - "spec_testsuite/proposals/multi-memory/imports0.wast", - "spec_testsuite/proposals/multi-memory/imports3.wast", "spec_testsuite/proposals/multi-memory/linking0.wast", "spec_testsuite/proposals/multi-memory/linking3.wast", "spec_testsuite/proposals/multi-memory/load.wast", @@ -538,7 +521,6 @@ impl WastTest { "spec_testsuite/proposals/threads/memory.wast", "spec_testsuite/ref_func.wast", "spec_testsuite/ref_is_null.wast", - "spec_testsuite/return.wast", "spec_testsuite/select.wast", "spec_testsuite/simd_address.wast", "spec_testsuite/simd_align.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 5a25f3f7fdd1..061800853ac1 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1599,6 +1599,16 @@ impl OpVisitor for Interpreter<'_> { self.state[operands.dst].set_u64(a | b); ControlFlow::Continue(()) } + + fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow { + self.state[dst].set_f32(f32::from_bits(bits)); + ControlFlow::Continue(()) + } + + fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow { + self.state[dst].set_f64(f64::from_bits(bits)); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 784e3b7aaeac..a345f480806f 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -333,6 +333,11 @@ macro_rules! for_each_op { xor32 = XOr32 { operands: BinaryOperands }; /// `dst = src1 | src2` xor64 = XOr64 { operands: BinaryOperands }; + + /// `low32(dst) = bits` + fconst32 = FConst32 { dst: FReg, bits: u32 }; + /// `dst = bits` + fconst64 = FConst64 { dst: FReg, bits: u64 }; } }; } From da41a5582a2ee6fc4e807c7643a9fccf2fee2c48 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 11 Dec 2024 19:48:13 +0100 Subject: [PATCH 23/30] Winch: Implement rem for aarch64 (#9781) * implement copy/clone for RemKind * implement remainder operation for aarch64 * add tests * fmt pass * review edits --- tests/disas/winch/aarch64/i32_rems/const.wat | 35 ++++++++++++ .../disas/winch/aarch64/i32_rems/one_zero.wat | 35 ++++++++++++ .../disas/winch/aarch64/i32_rems/overflow.wat | 35 ++++++++++++ tests/disas/winch/aarch64/i32_rems/params.wat | 35 ++++++++++++ .../winch/aarch64/i32_rems/zero_zero.wat | 35 ++++++++++++ tests/disas/winch/aarch64/i32_remu/const.wat | 35 ++++++++++++ .../disas/winch/aarch64/i32_remu/one_zero.wat | 35 ++++++++++++ tests/disas/winch/aarch64/i32_remu/params.wat | 35 ++++++++++++ tests/disas/winch/aarch64/i32_remu/signed.wat | 35 ++++++++++++ .../winch/aarch64/i32_remu/zero_zero.wat | 35 ++++++++++++ tests/disas/winch/aarch64/i64_rems/const.wat | 33 ++++++++++++ .../disas/winch/aarch64/i64_rems/one_zero.wat | 33 ++++++++++++ .../disas/winch/aarch64/i64_rems/overflow.wat | 33 ++++++++++++ tests/disas/winch/aarch64/i64_rems/params.wat | 33 ++++++++++++ .../winch/aarch64/i64_rems/zero_zero.wat | 33 ++++++++++++ tests/disas/winch/aarch64/i64_remu/const.wat | 33 ++++++++++++ .../disas/winch/aarch64/i64_remu/one_zero.wat | 33 ++++++++++++ tests/disas/winch/aarch64/i64_remu/params.wat | 33 ++++++++++++ tests/disas/winch/aarch64/i64_remu/signed.wat | 33 ++++++++++++ .../winch/aarch64/i64_remu/zero_zero.wat | 33 ++++++++++++ winch/codegen/src/isa/aarch64/asm.rs | 54 ++++++++++++++++++- winch/codegen/src/isa/aarch64/masm.rs | 12 ++++- winch/codegen/src/masm.rs | 7 +++ 23 files changed, 750 insertions(+), 3 deletions(-) create mode 100644 tests/disas/winch/aarch64/i32_rems/const.wat create mode 100644 tests/disas/winch/aarch64/i32_rems/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_rems/overflow.wat create mode 100644 tests/disas/winch/aarch64/i32_rems/params.wat create mode 100644 tests/disas/winch/aarch64/i32_rems/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_remu/const.wat create mode 100644 tests/disas/winch/aarch64/i32_remu/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i32_remu/params.wat create mode 100644 tests/disas/winch/aarch64/i32_remu/signed.wat create mode 100644 tests/disas/winch/aarch64/i32_remu/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_rems/const.wat create mode 100644 tests/disas/winch/aarch64/i64_rems/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_rems/overflow.wat create mode 100644 tests/disas/winch/aarch64/i64_rems/params.wat create mode 100644 tests/disas/winch/aarch64/i64_rems/zero_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_remu/const.wat create mode 100644 tests/disas/winch/aarch64/i64_remu/one_zero.wat create mode 100644 tests/disas/winch/aarch64/i64_remu/params.wat create mode 100644 tests/disas/winch/aarch64/i64_remu/signed.wat create mode 100644 tests/disas/winch/aarch64/i64_remu/zero_zero.wat diff --git a/tests/disas/winch/aarch64/i32_rems/const.wat b/tests/disas/winch/aarch64/i32_rems/const.wat new file mode 100644 index 000000000000..b01e691bae13 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/const.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 7) + (i32.const 5) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov w0, w16 +;; mov x16, #7 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/one_zero.wat b/tests/disas/winch/aarch64/i32_rems/one_zero.wat new file mode 100644 index 000000000000..d38495fedd4c --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/one_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/overflow.wat b/tests/disas/winch/aarch64/i32_rems/overflow.wat new file mode 100644 index 000000000000..f16ef7bacc85 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/overflow.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0x80000000) + (i32.const -1) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; mov x16, #0x80000000 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/params.wat b/tests/disas/winch/aarch64/i32_rems/params.wat new file mode 100644 index 000000000000..84fc8b067816 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/params.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/zero_zero.wat b/tests/disas/winch/aarch64/i32_rems/zero_zero.wat new file mode 100644 index 000000000000..d032f4340b24 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/zero_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/const.wat b/tests/disas/winch/aarch64/i32_remu/const.wat new file mode 100644 index 000000000000..7b073a44a039 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/const.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 7) + (i32.const 5) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov w0, w16 +;; mov x16, #7 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/one_zero.wat b/tests/disas/winch/aarch64/i32_remu/one_zero.wat new file mode 100644 index 000000000000..484229500d98 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/one_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/params.wat b/tests/disas/winch/aarch64/i32_remu/params.wat new file mode 100644 index 000000000000..d107b220b14f --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/params.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/signed.wat b/tests/disas/winch/aarch64/i32_remu/signed.wat new file mode 100644 index 000000000000..9f205360ab15 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/signed.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const -1) + (i32.const -1) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; orr x16, xzr, #0xffffffff +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/zero_zero.wat b/tests/disas/winch/aarch64/i32_remu/zero_zero.wat new file mode 100644 index 000000000000..4b5b48b8d14a --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/zero_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/const.wat b/tests/disas/winch/aarch64/i64_rems/const.wat new file mode 100644 index 000000000000..b5c29486f77e --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/const.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 7) + (i64.const 5) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov x0, x16 +;; mov x16, #7 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/one_zero.wat b/tests/disas/winch/aarch64/i64_rems/one_zero.wat new file mode 100644 index 000000000000..144abaaa0f50 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/one_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/overflow.wat b/tests/disas/winch/aarch64/i64_rems/overflow.wat new file mode 100644 index 000000000000..78b82a695075 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/overflow.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0x8000000000000000) + (i64.const -1) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-0x8000000000000000 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/params.wat b/tests/disas/winch/aarch64/i64_rems/params.wat new file mode 100644 index 000000000000..3022b8adc970 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/params.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/zero_zero.wat b/tests/disas/winch/aarch64/i64_rems/zero_zero.wat new file mode 100644 index 000000000000..59819f6ed1ba --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/zero_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/const.wat b/tests/disas/winch/aarch64/i64_remu/const.wat new file mode 100644 index 000000000000..07b65bc1a32e --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/const.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 7) + (i64.const 5) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov x0, x16 +;; mov x16, #7 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/one_zero.wat b/tests/disas/winch/aarch64/i64_remu/one_zero.wat new file mode 100644 index 000000000000..a9a756cb5684 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/one_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/params.wat b/tests/disas/winch/aarch64/i64_remu/params.wat new file mode 100644 index 000000000000..b1244e4315d0 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/params.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/signed.wat b/tests/disas/winch/aarch64/i64_remu/signed.wat new file mode 100644 index 000000000000..866d842403a8 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/signed.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const -1) + (i64.const -1) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/zero_zero.wat b/tests/disas/winch/aarch64/i64_remu/zero_zero.wat new file mode 100644 index 000000000000..c2d2c6786daf --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/zero_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index 47905aba280d..78a04ae0fa2c 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -2,7 +2,9 @@ use super::{address::Address, regs}; use crate::aarch64::regs::zero; -use crate::masm::{DivKind, ExtendKind, FloatCmpKind, IntCmpKind, RoundingMode, ShiftKind}; +use crate::masm::{ + DivKind, ExtendKind, FloatCmpKind, IntCmpKind, RemKind, RoundingMode, ShiftKind, +}; use crate::CallingConvention; use crate::{ masm::OperandSize, @@ -462,6 +464,56 @@ impl Assembler { ); } + /// Signed/unsigned remainder operation with three registers. + pub fn rem_rrr( + &mut self, + divisor: Reg, + dividend: Reg, + dest: Writable, + kind: RemKind, + size: OperandSize, + ) { + // Check for division by 0 + self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO); + + // `cranelift-codegen` doesn't support emitting u/sdiv for anything but I64, + // we therefore sign-extend the operand. + // see: https://github.com/bytecodealliance/wasmtime/issues/9766 + if size == OperandSize::S32 { + let extend_kind = if kind.is_signed() { + ExtendKind::I64Extend32S + } else { + ExtendKind::I64ExtendI32U + }; + + self.extend(divisor, writable!(divisor), extend_kind); + self.extend(dividend, writable!(dividend), extend_kind); + } + + let op = match kind { + RemKind::Signed => ALUOp::SDiv, + RemKind::Unsigned => ALUOp::UDiv, + }; + + let scratch = regs::scratch(); + self.emit_alu_rrr( + op, + divisor, + dividend, + writable!(scratch.into()), + OperandSize::S64, + ); + + self.emit_alu_rrrr( + ALUOp3::MSub, + scratch, + divisor, + dest.map(Into::into), + dividend, + OperandSize::S64, + ); + } + /// And with three registers. pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit_alu_rrr(ALUOp::And, rm, rn, rd, size); diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 79d0d388c70b..e4ba5619bcb5 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -483,8 +483,16 @@ impl Masm for MacroAssembler { }) } - fn rem(&mut self, _context: &mut CodeGenContext, _kind: RemKind, _size: OperandSize) { - todo!() + fn rem(&mut self, context: &mut CodeGenContext, kind: RemKind, size: OperandSize) { + context.binop(self, size, |this, dividend, divisor, size| { + this.asm + .rem_rrr(divisor, dividend, writable!(dividend), kind, size); + match size { + OperandSize::S32 => TypedReg::new(WasmValType::I32, dividend), + OperandSize::S64 => TypedReg::new(WasmValType::I64, dividend), + s => unreachable!("invalid size for remainder: {s:?}"), + } + }) } fn zero(&mut self, reg: WritableReg) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 53e97bca3cef..9cb30372fcf9 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -23,6 +23,7 @@ pub(crate) enum DivKind { } /// Remainder kind. +#[derive(Copy, Clone)] pub(crate) enum RemKind { /// Signed remainder. Signed, @@ -30,6 +31,12 @@ pub(crate) enum RemKind { Unsigned, } +impl RemKind { + pub fn is_signed(&self) -> bool { + matches!(self, Self::Signed) + } +} + #[derive(Eq, PartialEq)] pub(crate) enum MulWideKind { Signed, From 8c321f7a9557e945ab4338ef33ab10d2c169a7f2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 11:59:47 -0700 Subject: [PATCH 24/30] pulley: Get `block.wast` test suite passing (#9790) Filling out float compares and some miscellaneous bit-related integer instructions. --- .../codegen/src/isa/pulley_shared/lower.isle | 36 +++++++++ crates/wast-util/src/lib.rs | 3 - pulley/src/interp.rs | 80 +++++++++++++++++++ pulley/src/lib.rs | 27 +++++++ 4 files changed, 143 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 621a6cc066f2..144915384cd2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -236,6 +236,16 @@ (rule 1 (lower (has_type $I64 (bor a b))) (pulley_xor64 a b)) +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ctz a))) (pulley_xctz32 a)) +(rule (lower (has_type $I64 (ctz a))) (pulley_xctz64 a)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (clz a))) (pulley_xclz32 a)) +(rule (lower (has_type $I64 (clz a))) (pulley_xclz64 a)) + ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (icmp cc a b @ (value_type $I64))) @@ -292,6 +302,32 @@ (rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b) (lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a)) +;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty)))) + (lower_fcmp ty cc a b)) + +(decl lower_fcmp (Type FloatCC Value Value) XReg) + +(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.Equal) a b) (pulley_feq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.NotEqual) a b) (pulley_fneq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.NotEqual) a b) (pulley_fneq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThan) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThan) a b) (pulley_flt64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThanOrEqual) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThanOrEqual) a b) (pulley_flt64 a b)) + +;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered` `FloatCC` +;; conditions as that's not needed by wasm at this time. + +;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the +;; operation. +(rule (lower_fcmp ty (FloatCC.GreaterThan) a b) + (lower_fcmp ty (FloatCC.LessThan) b a)) +(rule (lower_fcmp ty (FloatCC.GreaterThanOrEqual) a b) + (lower_fcmp ty (FloatCC.LessThanOrEqual) b a)) + ;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl amode (Value Offset32) Amode) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 16c4c3863256..262dde6e8872 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -454,13 +454,11 @@ impl WastTest { "misc_testsuite/winch/_simd_load.wast", "misc_testsuite/winch/_simd_multivalue.wast", "misc_testsuite/winch/_simd_store.wast", - "misc_testsuite/winch/float-comparison.wast", "misc_testsuite/winch/global.wast", "misc_testsuite/winch/select.wast", "misc_testsuite/winch/table_fill.wast", "misc_testsuite/winch/table_get.wast", "misc_testsuite/winch/table_set.wast", - "spec_testsuite/block.wast", "spec_testsuite/br_if.wast", "spec_testsuite/bulk.wast", "spec_testsuite/call.wast", @@ -495,7 +493,6 @@ impl WastTest { "spec_testsuite/loop.wast", "spec_testsuite/memory.wast", "spec_testsuite/memory_grow.wast", - "spec_testsuite/nop.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", "spec_testsuite/proposals/extended-const/elem.wast", "spec_testsuite/proposals/extended-const/global.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 061800853ac1..4a667dfef1ef 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1609,6 +1609,86 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_f64(f64::from_bits(bits)); ControlFlow::Continue(()) } + + fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.trailing_zeros()); + ControlFlow::Continue(()) + } + + fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.trailing_zeros().into()); + ControlFlow::Continue(()) + } + + fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.leading_zeros()); + ControlFlow::Continue(()) + } + + fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.leading_zeros().into()); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index a345f480806f..cb805af65460 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -184,6 +184,16 @@ macro_rules! for_each_op { /// 64-bit wrapping subtraction: `dst = src1 - src2`. xsub64 = Xsub64 { operands: BinaryOperands }; + /// `low32(dst) = trailing_zeros(low32(src))` + xctz32 = Xctz32 { dst: XReg, src: XReg }; + /// `dst = trailing_zeros(src)` + xctz64 = Xctz64 { dst: XReg, src: XReg }; + + /// `low32(dst) = leading_zeros(low32(src))` + xclz32 = Xclz32 { dst: XReg, src: XReg }; + /// `dst = leading_zeros(src)` + xclz64 = Xclz64 { dst: XReg, src: XReg }; + /// `low32(dst) = low32(src1) << low5(src2)` xshl32 = Xshl32 { operands: BinaryOperands }; /// `low32(dst) = low32(src1) >> low5(src2)` @@ -338,6 +348,23 @@ macro_rules! for_each_op { fconst32 = FConst32 { dst: FReg, bits: u32 }; /// `dst = bits` fconst64 = FConst64 { dst: FReg, bits: u64 }; + + /// `low32(dst) = zext(src1 == src2)` + feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 == src2)` + feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg }; } }; } From 49ec87da0f971819b4a853f5916d5f8c994c38ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Wed, 11 Dec 2024 14:55:40 -0500 Subject: [PATCH 25/30] winch(aarch64): Implement `load_addr` (#9792) This commit fills in the `load_addr` Masm implementation. `load_addr` is mostly used for multi-value. This commit introduces some disas tests as well. --- tests/disas/winch/aarch64/call/multi.wat | 64 ++++++++++++++++++++++++ tests/disas/winch/x64/call/multi.wat | 62 +++++++++++++++++++++++ winch/codegen/src/isa/aarch64/asm.rs | 2 +- winch/codegen/src/isa/aarch64/masm.rs | 4 +- 4 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 tests/disas/winch/aarch64/call/multi.wat create mode 100644 tests/disas/winch/x64/call/multi.wat diff --git a/tests/disas/winch/aarch64/call/multi.wat b/tests/disas/winch/aarch64/call/multi.wat new file mode 100644 index 000000000000..8dfd5e5e4929 --- /dev/null +++ b/tests/disas/winch/aarch64/call/multi.wat @@ -0,0 +1,64 @@ +;;! target = "aarch64" +;;! test = "winch" +(module + (func $multi (result i32 i32) + i32.const 1 + i32.const 2) + + (func $start + call $multi + drop + drop) +) +;; wasm[0]::function[0]::multi: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x1 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x0, [x28] +;; mov x16, #2 +;; mov w0, w16 +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x16, #1 +;; stur w16, [x28] +;; ldur x1, [x28, #4] +;; ldur w16, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; stur w16, [x1] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::start: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; sub sp, sp, #0xc +;; mov x28, sp +;; mov x1, x9 +;; mov x2, x9 +;; ldur x0, [x28, #0xc] +;; bl #0 +;; a0: add sp, sp, #0xc +;; mov x28, sp +;; ldur x9, [x28, #0xc] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/x64/call/multi.wat b/tests/disas/winch/x64/call/multi.wat new file mode 100644 index 000000000000..60a18893262d --- /dev/null +++ b/tests/disas/winch/x64/call/multi.wat @@ -0,0 +1,62 @@ +;;! target = "x86_64" +;;! test = "winch" +(module + (func $multi (result i32 i32) + i32.const 1 + i32.const 2) + + (func $start + call $multi + drop + drop) +) +;; wasm[0]::function[0]::multi: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rsi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x24, %r11 +;; cmpq %rsp, %r11 +;; ja 0x58 +;; 1c: movq %rsi, %r14 +;; subq $0x20, %rsp +;; movq %rsi, 0x18(%rsp) +;; movq %rdx, 0x10(%rsp) +;; movq %rdi, 8(%rsp) +;; movl $2, %eax +;; subq $4, %rsp +;; movl $1, (%rsp) +;; movq 0xc(%rsp), %rcx +;; movl (%rsp), %r11d +;; addq $4, %rsp +;; movl %r11d, (%rcx) +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 58: ud2 +;; +;; wasm[0]::function[1]::start: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0xb7 +;; 7c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; subq $4, %rsp +;; subq $0xc, %rsp +;; movq %r14, %rsi +;; movq %r14, %rdx +;; leaq 0xc(%rsp), %rdi +;; callq 0 +;; addq $0xc, %rsp +;; movq 0xc(%rsp), %r14 +;; addq $4, %rsp +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; b7: ud2 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index 78a04ae0fa2c..51fc0e6e9826 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -204,7 +204,7 @@ impl Assembler { self.ldr(addr, rd, size, false); } - /// Load a register. + /// Load address into a register. fn ldr(&mut self, addr: Address, rd: WritableReg, size: OperandSize, signed: bool) { use OperandSize::*; let writable_reg = rd.map(Into::into); diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index e4ba5619bcb5..1e0342a9dba7 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -214,8 +214,8 @@ impl Masm for MacroAssembler { } } - fn load_addr(&mut self, _src: Self::Address, _dst: WritableReg, _size: OperandSize) { - todo!() + fn load_addr(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) { + self.asm.uload(src, dst, size); } fn pop(&mut self, dst: WritableReg, size: OperandSize) { From 65312bfff1b85633ff3f09d4dad9259a7e2e5e62 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 13:15:48 -0700 Subject: [PATCH 26/30] pulley: More division/remainder instructions (#9791) * pulley: More division/remainder instructions Gets a few misc tests passing * Review comments --- .../src/isa/pulley_shared/inst/emit.rs | 12 +++ .../codegen/src/isa/pulley_shared/lower.isle | 21 ++++- crates/wast-util/src/lib.rs | 1 - pulley/src/interp.rs | 84 +++++++++++++++++++ pulley/src/lib.rs | 23 ++++- 5 files changed, 136 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 3fb99285eff1..5d3d752cf30d 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -627,6 +627,18 @@ fn pulley_emit

( RawInst::PushFrame | RawInst::StackAlloc32 { .. } => { sink.add_trap(ir::TrapCode::STACK_OVERFLOW); } + RawInst::XDiv32U { .. } + | RawInst::XDiv64U { .. } + | RawInst::XRem32U { .. } + | RawInst::XRem64U { .. } => { + sink.add_trap(ir::TrapCode::INTEGER_DIVISION_BY_ZERO); + } + RawInst::XDiv32S { .. } + | RawInst::XDiv64S { .. } + | RawInst::XRem32S { .. } + | RawInst::XRem64S { .. } => { + sink.add_trap(ir::TrapCode::INTEGER_OVERFLOW); + } _ => {} } super::generated::emit(raw, sink) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 144915384cd2..e96862716fd9 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -191,10 +191,25 @@ (rule (lower (has_type $I64 (isub a b))) (pulley_xsub64 a b)) -;;;; Rules for `idiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type $I32 (sdiv a b))) - (pulley_xdiv32_s a b)) +(rule (lower (has_type $I32 (sdiv a b))) (pulley_xdiv32_s a b)) +(rule (lower (has_type $I64 (sdiv a b))) (pulley_xdiv64_s a b)) + +;;;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (srem a b))) (pulley_xrem32_s a b)) +(rule (lower (has_type $I64 (srem a b))) (pulley_xrem64_s a b)) + +;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (udiv a b))) (pulley_xdiv32_u a b)) +(rule (lower (has_type $I64 (udiv a b))) (pulley_xdiv64_u a b)) + +;;;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (urem a b))) (pulley_xrem32_u a b)) +(rule (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b)) ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 262dde6e8872..2feadf5bbfac 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -399,7 +399,6 @@ impl WastTest { "misc_testsuite/call_indirect.wast", "misc_testsuite/component-model/fused.wast", "misc_testsuite/component-model/strings.wast", - "misc_testsuite/div-rem.wast", "misc_testsuite/embenchen_fannkuch.wast", "misc_testsuite/embenchen_fasta.wast", "misc_testsuite/embenchen_ifs.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 4a667dfef1ef..44e7ae3a44eb 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1572,6 +1572,90 @@ impl OpVisitor for Interpreter<'_> { } } + fn xdiv64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_i64(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xdiv32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_u32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xdiv64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_u64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_i64(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_i64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_u32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_u64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + fn xand32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index cb805af65460..466ab221cd59 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -332,9 +332,30 @@ macro_rules! for_each_op { /// `dst = sext(low32(src))` sext32 = Sext32 { dst: XReg, src: XReg }; - /// `dst = src1 / src2` (signed) + /// `low32(dst) = low32(src1) / low32(src2)` (signed) xdiv32_s = XDiv32S { operands: BinaryOperands }; + /// `dst = src1 / src2` (signed) + xdiv64_s = XDiv64S { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) / low32(src2)` (unsigned) + xdiv32_u = XDiv32U { operands: BinaryOperands }; + + /// `dst = src1 / src2` (unsigned) + xdiv64_u = XDiv64U { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) % low32(src2)` (signed) + xrem32_s = XRem32S { operands: BinaryOperands }; + + /// `dst = src1 / src2` (signed) + xrem64_s = XRem64S { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) % low32(src2)` (unsigned) + xrem32_u = XRem32U { operands: BinaryOperands }; + + /// `dst = src1 / src2` (unsigned) + xrem64_u = XRem64U { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) & low32(src2)` xand32 = XAnd32 { operands: BinaryOperands }; /// `dst = src1 & src2` From d9f1e889c1835aadeb17cbbfaedea7efb55cc817 Mon Sep 17 00:00:00 2001 From: minirop Date: Wed, 11 Dec 2024 23:05:46 +0100 Subject: [PATCH 27/30] winch Aarch64: signed_convert and unsigned_convert (#9787) * winch Aarch64: fix reinterpret instructions * winch Aarch64: implement signed_convert and unsigned_convert --- .../winch/aarch64/f32_convert_i32_s/const.wat | 25 ++++++++++ .../aarch64/f32_convert_i32_s/locals.wat | 28 +++++++++++ .../aarch64/f32_convert_i32_s/params.wat | 25 ++++++++++ .../aarch64/f32_convert_i32_s/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f32_convert_i32_u/const.wat | 25 ++++++++++ .../aarch64/f32_convert_i32_u/locals.wat | 28 +++++++++++ .../aarch64/f32_convert_i32_u/params.wat | 25 ++++++++++ .../aarch64/f32_convert_i32_u/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f32_convert_i64_s/const.wat | 25 ++++++++++ .../aarch64/f32_convert_i64_s/locals.wat | 28 +++++++++++ .../aarch64/f32_convert_i64_s/params.wat | 25 ++++++++++ .../aarch64/f32_convert_i64_s/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f32_convert_i64_u/const.wat | 25 ++++++++++ .../aarch64/f32_convert_i64_u/locals.wat | 28 +++++++++++ .../aarch64/f32_convert_i64_u/params.wat | 25 ++++++++++ .../aarch64/f32_convert_i64_u/spilled.wat | 33 +++++++++++++ .../aarch64/f32_reinterpret_i32/const.wat | 2 +- .../aarch64/f32_reinterpret_i32/locals.wat | 2 +- .../aarch64/f32_reinterpret_i32/params.wat | 2 +- .../aarch64/f32_reinterpret_i32/ret_int.wat | 2 +- .../aarch64/f32_reinterpret_i32/spilled.wat | 2 +- .../winch/aarch64/f64_convert_i32_s/const.wat | 25 ++++++++++ .../aarch64/f64_convert_i32_s/locals.wat | 28 +++++++++++ .../aarch64/f64_convert_i32_s/params.wat | 25 ++++++++++ .../aarch64/f64_convert_i32_s/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f64_convert_i32_u/const.wat | 25 ++++++++++ .../aarch64/f64_convert_i32_u/locals.wat | 28 +++++++++++ .../aarch64/f64_convert_i32_u/params.wat | 25 ++++++++++ .../aarch64/f64_convert_i32_u/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f64_convert_i64_s/const.wat | 25 ++++++++++ .../aarch64/f64_convert_i64_s/locals.wat | 28 +++++++++++ .../aarch64/f64_convert_i64_s/params.wat | 25 ++++++++++ .../aarch64/f64_convert_i64_s/spilled.wat | 33 +++++++++++++ .../winch/aarch64/f64_convert_i64_u/const.wat | 25 ++++++++++ .../aarch64/f64_convert_i64_u/locals.wat | 28 +++++++++++ .../aarch64/f64_convert_i64_u/params.wat | 25 ++++++++++ .../aarch64/f64_convert_i64_u/spilled.wat | 33 +++++++++++++ .../aarch64/f64_reinterpret_i64/const.wat | 2 +- .../aarch64/f64_reinterpret_i64/locals.wat | 2 +- .../aarch64/f64_reinterpret_i64/params.wat | 2 +- .../aarch64/f64_reinterpret_i64/ret_int.wat | 2 +- .../aarch64/f64_reinterpret_i64/spilled.wat | 2 +- .../aarch64/i32_reinterpret_f32/const.wat | 2 +- .../aarch64/i32_reinterpret_f32/locals.wat | 2 +- .../aarch64/i32_reinterpret_f32/params.wat | 2 +- .../aarch64/i32_reinterpret_f32/ret_float.wat | 2 +- .../aarch64/i64_reinterpret_f64/const.wat | 2 +- .../aarch64/i64_reinterpret_f64/locals.wat | 2 +- .../aarch64/i64_reinterpret_f64/params.wat | 2 +- .../aarch64/i64_reinterpret_f64/ret_float.wat | 2 +- winch/codegen/src/isa/aarch64/asm.rs | 46 +++++++++++++------ winch/codegen/src/isa/aarch64/masm.rs | 24 +++++----- 52 files changed, 949 insertions(+), 45 deletions(-) create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_s/const.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_s/params.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_u/const.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_u/params.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_s/const.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_s/params.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_u/const.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_u/params.wat create mode 100644 tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_s/const.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_s/params.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_u/const.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_u/params.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_s/const.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_s/params.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_u/const.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_u/params.wat create mode 100644 tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat new file mode 100644 index 000000000000..a08ac25861a0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat new file mode 100644 index 000000000000..a1dd81c7be17 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat new file mode 100644 index 000000000000..15f230835a51 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat new file mode 100644 index 000000000000..4cb1f7eafac5 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.convert_i32_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat new file mode 100644 index 000000000000..bff1dcd790c0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf s0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat new file mode 100644 index 000000000000..cf0fcabb762c --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w1, [x28, #4] +;; ucvtf s0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat new file mode 100644 index 000000000000..467519e5c443 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w1, [x28, #4] +;; ucvtf s0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat new file mode 100644 index 000000000000..0bb3682e64f5 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.convert_i32_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf s0, w1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat new file mode 100644 index 000000000000..91c3041223de --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i64.const 1) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf s0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat new file mode 100644 index 000000000000..46218695ce16 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i64) + + (local.get 0) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; scvtf s0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat new file mode 100644 index 000000000000..9361a131dda0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f32) + (local.get 0) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; scvtf s0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat new file mode 100644 index 000000000000..b5fcacc18376 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i64.const 1 + f32.convert_i64_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf s0, x0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat new file mode 100644 index 000000000000..755890aee7bc --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i64.const 1) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf s0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat new file mode 100644 index 000000000000..e0bd03e9de92 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i64) + + (local.get 0) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x1, [x28] +;; ucvtf s0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat new file mode 100644 index 000000000000..db4481e19479 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f32) + (local.get 0) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x1, [x28] +;; ucvtf s0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat new file mode 100644 index 000000000000..8a1d3493a1b1 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i64.const 1 + f32.convert_i64_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf s0, x1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat index 608f8faf7457..547961118308 100644 --- a/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov w0, w16 -;; scvtf s0, w0 +;; fmov s0, w0 ;; add sp, sp, #0x10 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat index 0d093cb0a6ec..16b5d518c6f5 100644 --- a/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat @@ -21,7 +21,7 @@ ;; mov x16, #0 ;; stur x16, [x28] ;; ldur w0, [x28, #4] -;; scvtf s0, w0 +;; fmov s0, w0 ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat index 27cdda4b9464..72e237790399 100644 --- a/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28, #8] ;; stur w2, [x28, #4] ;; ldur w0, [x28, #4] -;; scvtf s0, w0 +;; fmov s0, w0 ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat index 106ac60e48b1..dc2d243dab66 100644 --- a/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov w0, w16 -;; scvtf s0, w0 +;; fmov s0, w0 ;; mov x16, #1 ;; mov w0, w16 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat index d458d4ef32a0..76269859f915 100644 --- a/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov w0, w16 -;; scvtf s0, w0 +;; fmov s0, w0 ;; sub sp, sp, #4 ;; mov x28, sp ;; stur s0, [x28] diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat new file mode 100644 index 000000000000..7c6c0d395d33 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i32.const 1) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf d0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat new file mode 100644 index 000000000000..ed41ea5c9d3d --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i32) + + (local.get 0) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; scvtf d0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat new file mode 100644 index 000000000000..e8e517bfc847 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f64) + (local.get 0) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; scvtf d0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat new file mode 100644 index 000000000000..f8f02d4dc111 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i32.const 1 + f64.convert_i32_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf d0, w0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat new file mode 100644 index 000000000000..b2831a52785e --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i32.const 1) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf d0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat new file mode 100644 index 000000000000..1f79696145df --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i32) + + (local.get 0) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w1, [x28, #4] +;; ucvtf d0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat new file mode 100644 index 000000000000..0540e90ad4a5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f64) + (local.get 0) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w1, [x28, #4] +;; ucvtf d0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat new file mode 100644 index 000000000000..240299c35977 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i32.const 1 + f64.convert_i32_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf d0, w1 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat new file mode 100644 index 000000000000..a6b2541f776a --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat new file mode 100644 index 000000000000..ea96b2b15cd5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat new file mode 100644 index 000000000000..d5bba734be30 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat new file mode 100644 index 000000000000..5038e02fb556 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.convert_i64_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat new file mode 100644 index 000000000000..a5289b0106e5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf d0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat new file mode 100644 index 000000000000..2f13d71619c1 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x1, [x28] +;; ucvtf d0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat new file mode 100644 index 000000000000..8bcabb2daebe --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x1, [x28] +;; ucvtf d0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat new file mode 100644 index 000000000000..80b9a97b35ba --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.convert_i64_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf d0, x1 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat index 850e6d7af3c1..ee0cdfaddb59 100644 --- a/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov x0, x16 -;; scvtf d0, x0 +;; fmov d0, x0 ;; add sp, sp, #0x10 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat index 03bf4808fc47..c8c0d9b02a98 100644 --- a/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat @@ -21,7 +21,7 @@ ;; mov x16, #0 ;; stur x16, [x28] ;; ldur x0, [x28] -;; scvtf d0, x0 +;; fmov d0, x0 ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat index fd5c1e5c00c1..9b18dae34166 100644 --- a/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28, #8] ;; stur x2, [x28] ;; ldur x0, [x28] -;; scvtf d0, x0 +;; fmov d0, x0 ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat index 71c0b1b22ad9..684360b374b5 100644 --- a/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov x0, x16 -;; scvtf d0, x0 +;; fmov d0, x0 ;; mov x16, #1 ;; mov x0, x16 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat index 9eddaf9cb11f..dddc29096585 100644 --- a/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #1 ;; mov x0, x16 -;; scvtf d0, x0 +;; fmov d0, x0 ;; sub sp, sp, #8 ;; mov x28, sp ;; stur d0, [x28] diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat index 7df0566fc97a..1e742b16b287 100644 --- a/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28] ;; mov x16, #0x3f800000 ;; fmov s0, w16 -;; fcvtzs w0, s0 +;; mov w0, v0.s[0] ;; add sp, sp, #0x10 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat index 3d2372f5c681..22f63fbc1d72 100644 --- a/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat @@ -21,7 +21,7 @@ ;; mov x16, #0 ;; stur x16, [x28] ;; ldur s0, [x28, #4] -;; fcvtzs w0, s0 +;; mov w0, v0.s[0] ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat index 953a8fe1b339..9f658cc6718a 100644 --- a/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28, #8] ;; stur s0, [x28, #4] ;; ldur s0, [x28, #4] -;; fcvtzs w0, s0 +;; mov w0, v0.s[0] ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat index d8c9ab13a43b..2c8fa5b81104 100644 --- a/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #0x3f800000 ;; fmov s0, w16 -;; fcvtzs w0, s0 +;; mov w0, v0.s[0] ;; mov x16, #0x3f800000 ;; fmov s0, w16 ;; add sp, sp, #0x10 diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat index dd8b0f44fb1f..7949beae66cb 100644 --- a/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28] ;; mov x16, #0x3ff0000000000000 ;; fmov d0, x16 -;; fcvtzs x0, d0 +;; mov x0, v0.d[0] ;; add sp, sp, #0x10 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat index 7170a44a4e8b..7ab5e90b47ca 100644 --- a/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat @@ -21,7 +21,7 @@ ;; mov x16, #0 ;; stur x16, [x28] ;; ldur d0, [x28] -;; fcvtzs x0, d0 +;; mov x0, v0.d[0] ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat index 52ed87a9e24d..8e6ff8f72344 100644 --- a/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat @@ -18,7 +18,7 @@ ;; stur x1, [x28, #8] ;; stur d0, [x28] ;; ldur d0, [x28] -;; fcvtzs x0, d0 +;; mov x0, v0.d[0] ;; add sp, sp, #0x18 ;; mov x28, sp ;; ldp x29, x30, [sp], #0x10 diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat index 7419af60ff6b..93c3f9b19f94 100644 --- a/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat @@ -20,7 +20,7 @@ ;; stur x1, [x28] ;; mov x16, #0x3ff0000000000000 ;; fmov d0, x16 -;; fcvtzs x0, d0 +;; mov x0, v0.d[0] ;; mov x16, #0x3ff0000000000000 ;; fmov d0, x16 ;; add sp, sp, #0x10 diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index 51fc0e6e9826..6bbc4994ac34 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -19,8 +19,8 @@ use cranelift_codegen::{ ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, FPULeftShiftImm, FPUOp1, FPUOp2, FPUOpRI::{self, UShr32, UShr64}, - FPUOpRIMod, FPURightShiftImm, FpuRoundMode, FpuToIntOp, Imm12, ImmLogic, ImmShift, Inst, - IntToFpuOp, PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, + FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp, + PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, }, settings, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, Writable, @@ -717,27 +717,43 @@ impl Assembler { }) } - /// Reinterpret a float as an integer. - pub fn fpu_to_int(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { - let op = match size { - OperandSize::S32 => FpuToIntOp::F32ToI32, - OperandSize::S64 => FpuToIntOp::F64ToI64, - OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(), + /// Convert an signed integer to a float. + pub fn cvt_sint_to_float( + &mut self, + rn: Reg, + rd: WritableReg, + src_size: OperandSize, + dst_size: OperandSize, + ) { + let op = match (src_size, dst_size) { + (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32, + (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32, + (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64, + (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64, + _ => unreachable!(), }; - self.emit(Inst::FpuToInt { + self.emit(Inst::IntToFpu { op, rd: rd.map(Into::into), rn: rn.into(), }); } - /// Reinterpret an integer as a float. - pub fn int_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { - let op = match size { - OperandSize::S32 => IntToFpuOp::I32ToF32, - OperandSize::S64 => IntToFpuOp::I64ToF64, - OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(), + /// Convert an unsigned integer to a float. + pub fn cvt_uint_to_float( + &mut self, + rn: Reg, + rd: WritableReg, + src_size: OperandSize, + dst_size: OperandSize, + ) { + let op = match (src_size, dst_size) { + (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32, + (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32, + (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64, + (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64, + _ => unreachable!(), }; self.emit(Inst::IntToFpu { diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 1e0342a9dba7..fa91909aba3a 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -535,31 +535,31 @@ impl Masm for MacroAssembler { fn signed_convert( &mut self, - _dst: WritableReg, - _src: Reg, - _src_size: OperandSize, - _dst_size: OperandSize, + dst: WritableReg, + src: Reg, + src_size: OperandSize, + dst_size: OperandSize, ) { - todo!() + self.asm.cvt_sint_to_float(src, dst, src_size, dst_size); } fn unsigned_convert( &mut self, - _dst: WritableReg, - _src: Reg, + dst: WritableReg, + src: Reg, _tmp_gpr: Reg, - _src_size: OperandSize, - _dst_size: OperandSize, + src_size: OperandSize, + dst_size: OperandSize, ) { - todo!() + self.asm.cvt_uint_to_float(src, dst, src_size, dst_size); } fn reinterpret_float_as_int(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { - self.asm.fpu_to_int(src, dst, size); + self.asm.mov_from_vec(src, dst, 0, size); } fn reinterpret_int_as_float(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { - self.asm.int_to_fpu(src, dst, size); + self.asm.mov_to_fpu(src, dst, size); } fn demote(&mut self, dst: WritableReg, src: Reg) { From c2fa8171f2e081b117d0d498bde5300592638f03 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 16:07:14 -0700 Subject: [PATCH 28/30] pulley: Add simple debugging support (#9796) This commit adds a `debug.rs` to Pulley to print out the instruction being executed and the state of all registers between instructions. This is turned off by default and does not have a runtime or environment-based configuration value. Instead changing this requires changing source code for now. This enables the interpreter loop to unconditionally use this "debugger" where it'll compile away to nothing in release/benchmarking situations. This commit additionally adds this support to the `tail_loop` module and fixes a few issues there such as it accidentally not being tested in CI as well as a new `#[cfg]` to use it on stable rust with normal `return` under the assumption that LLVM is highly likely to do TCO. --- .github/workflows/main.yml | 5 +- Cargo.toml | 9 ++- pulley/src/interp.rs | 5 +- pulley/src/interp/debug.rs | 128 ++++++++++++++++++++++++++++++++ pulley/src/interp/match_loop.rs | 5 +- pulley/src/interp/tail_loop.rs | 71 +++++++++++------- 6 files changed, 188 insertions(+), 35 deletions(-) create mode 100644 pulley/src/interp/debug.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8fe8bd484bff..e3ca611df3a6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -594,9 +594,12 @@ jobs: # Check that `pulley-interpreter` compiles with tail calls enabled. Don't # actually run the tests with tail calls enabled, because they are not yet # implemented in rustc and cause an ICE. - - run: cargo check -p pulley-interpreter + - run: cargo check -p pulley-interpreter --all-features env: RUSTFLAGS: "--cfg pulley_tail_calls" + - run: cargo test -p pulley-interpreter --all-features --release + env: + RUSTFLAGS: "--cfg pulley_assume_llvm_makes_tail_calls" # Ensure that fuzzers still build. # diff --git a/Cargo.toml b/Cargo.toml index e5d524207ad4..58f1a3226261 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -177,8 +177,13 @@ unused_import_braces = 'warn' unused-lifetimes = 'warn' unused-macro-rules = 'warn' -# Don't warn about unknown cfg condition in `#[cfg(pulley_tail_calls)]` -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(pulley_tail_calls)'] } +# Don't warn about unknown cfgs for pulley +[workspace.lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(pulley_tail_calls)', + 'cfg(pulley_assume_llvm_makes_tail_calls)', +] [workspace.lints.clippy] # The default set of lints in Clippy is viewed as "too noisy" right now so diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 44e7ae3a44eb..a4040a3838a0 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -13,9 +13,10 @@ use core::ops::{Index, IndexMut}; use core::ptr::NonNull; use sptr::Strict; -#[cfg(not(pulley_tail_calls))] +mod debug; +#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))] mod match_loop; -#[cfg(pulley_tail_calls)] +#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))] mod tail_loop; const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB diff --git a/pulley/src/interp/debug.rs b/pulley/src/interp/debug.rs new file mode 100644 index 000000000000..239136210129 --- /dev/null +++ b/pulley/src/interp/debug.rs @@ -0,0 +1,128 @@ +//! Primitive support for debugging Pulley +//! +//! This `Debug` visitor defined in this module is what's actually used as part +//! of the interpreter loop in Pulley. Due to the code size impact of always +//! including this and the runtime overhead of always checking a flag this is +//! enabled/disabled via a `const DEBUG` below. This is currently only really +//! suitable for one-off debugging while developing locally. +//! +//! The hope is that this'll eventually evolve into something more useful, but +//! for now it's a quick-and-easy way to dump all the instructions that are +//! executed as well as the values in various registers. +//! +//! If debugging is disabled, or in `#[no_std]` mode, then this module should +//! compile away (e.g. a "zero cost abstraction"). + +use super::Interpreter; +use crate::decode::{ExtendedOpVisitor, OpVisitor}; +use crate::imms::*; +use crate::regs::*; +use alloc::string::ToString; + +// Whether or not debugging is enabled at all. +const DEBUG: bool = false; + +// Whether or not these registers are dumped between each instruction. +const DEBUG_X_REGS: bool = true; +const DEBUG_F_REGS: bool = false; + +#[cfg(not(feature = "std"))] +macro_rules! print { + ($($t:tt)*) => ({ let _ = format_args!($($t)*); }) +} +#[cfg(not(feature = "std"))] +macro_rules! println { + () => (); + ($($t:tt)*) => ({ let _ = format_args!($($t)*); }) +} + +#[repr(transparent)] +pub(super) struct Debug<'a>(pub Interpreter<'a>); + +macro_rules! debug_then_delegate { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { + $( + $( #[$field_attr:meta] )* + $field:ident : $field_ty:ty + ),* + } )? ; + )* + ) => { + $( + $( #[$attr] )* + fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return { + if DEBUG { + println!( + concat!( + stringify!($snake_name), + $( + $( + " ", + stringify!($field), + "={:?}", + )* + )? + ), + $($($field),*)? + ); + } + self.0.$snake_name($( $($field),* )?) + } + )* + } +} + +impl<'a> OpVisitor for Debug<'a> { + type BytecodeStream = as OpVisitor>::BytecodeStream; + type Return = as OpVisitor>::Return; + + fn bytecode(&mut self) -> &mut Self::BytecodeStream { + self.0.bytecode() + } + + fn before_visit(&mut self) { + if !DEBUG { + return; + } + print!("\t{:?}\t", self.bytecode().as_ptr()); + } + + fn after_visit(&mut self) { + if !DEBUG { + return; + } + if DEBUG_X_REGS { + for (i, regs) in self.0.state.x_regs.chunks(4).enumerate() { + print!("\t\t"); + for (j, reg) in regs.iter().enumerate() { + let n = i * 4 + j; + let val = reg.get_u64(); + let reg = XReg::new(n as u8).unwrap().to_string(); + print!(" {reg:>3}={val:#018x}"); + } + println!(); + } + } + if DEBUG_F_REGS { + for (i, regs) in self.0.state.f_regs.chunks(4).enumerate() { + print!("\t\t"); + for (j, reg) in regs.iter().enumerate() { + let n = i * 4 + j; + let val = reg.get_f64().to_bits(); + let reg = FReg::new(n as u8).unwrap().to_string(); + print!(" {reg:>3}={val:#018x}"); + } + println!(); + } + } + } + + for_each_op!(debug_then_delegate); +} + +impl<'a> ExtendedOpVisitor for Debug<'a> { + for_each_extended_op!(debug_then_delegate); +} diff --git a/pulley/src/interp/match_loop.rs b/pulley/src/interp/match_loop.rs index f9f2d3bd0214..46d949632f75 100644 --- a/pulley/src/interp/match_loop.rs +++ b/pulley/src/interp/match_loop.rs @@ -19,8 +19,9 @@ use super::*; use crate::decode::unwrap_uninhabited; impl Interpreter<'_> { - pub fn run(mut self) -> Done { + pub fn run(self) -> Done { let mut decoder = Decoder::new(); + let mut visitor = debug::Debug(self); loop { // Here `decode_one` will call the appropriate `OpVisitor` method on // `self` via the trait implementation in the module above this. @@ -29,7 +30,7 @@ impl Interpreter<'_> { // // This will then continue indefinitely until the bytecode says it's // done. Note that only trusted bytecode is interpreted here. - match unwrap_uninhabited(decoder.decode_one(&mut self)) { + match unwrap_uninhabited(decoder.decode_one(&mut visitor)) { ControlFlow::Continue(()) => {} ControlFlow::Break(done) => break done, } diff --git a/pulley/src/interp/tail_loop.rs b/pulley/src/interp/tail_loop.rs index ddc714807820..d9f63ff6afb8 100644 --- a/pulley/src/interp/tail_loop.rs +++ b/pulley/src/interp/tail_loop.rs @@ -1,6 +1,35 @@ +//! Support executing the interpreter loop through tail-calls rather than a +//! source-level `loop`. +//! +//! This is an alternative means of executing the interpreter loop of Pulley. +//! The other method is in `match_loop.rs` which is a `loop` over a `match` +//! (more-or-less). This file instead transitions between opcodes with +//! tail-calls. +//! +//! At this time this module is more performant but disabled by default. Rust +//! does not have guaranteed tail call elimination at this time so this is not +//! a suitable means of writing an interpreter loop. That being said this is +//! included nonetheless for us to experiment and analyze with. +//! +//! There are two methods of using this module: +//! +//! * `RUSTFLAGS=--cfg=pulley_assume_llvm_makes_tail_calls` - this compilation +//! flag indicates that we should assume that LLVM will optimize to making +//! tail calls for things that look like tail calls. Practically this +//! probably only happens with `--release` and for popular native +//! architectures. It's up to the person compiling to manually +//! audit/verify/test that TCO is happening. +//! +//! * `RUSTFLAGS=--cfg=pulley_tail_calls` - this compilation flag indicates that +//! Rust's nightly-only support for guaranteed tail calls should be used. This +//! uses the `become` keyword, for example. At this time this feature of Rust +//! is highly experimental and not even complete. It only passes `cargo check` +//! at this time but doesn't actually run anywhere. + use super::*; -use crate::decode::unwrap_uninhabited; +use crate::decode::{unwrap_uninhabited, ExtendedOpVisitor}; use crate::opcode::Opcode; +use crate::ExtendedOpcode; type Handler = fn(Interpreter<'_>) -> Done; @@ -15,12 +44,20 @@ type Handler = fn(Interpreter<'_>) -> Done; /// Macro bodies are just bags of tokens; the body is not parsed until after /// they are expanded, and this macro is only expanded when `pulley_tail_calls` /// is enabled. +#[cfg(pulley_tail_calls)] macro_rules! tail_call { ($e:expr) => { become $e }; } +#[cfg(pulley_assume_llvm_makes_tail_calls)] +macro_rules! tail_call { + ($e:expr) => { + return $e + }; +} + impl Interpreter<'_> { pub fn run(mut self) -> Done { // Perform a dynamic dispatch through a function pointer indexed by @@ -101,8 +138,10 @@ macro_rules! define_opcode_handler { crate::decode::operands::$snake_name(i.bytecode()) ); )? - match OpVisitor::$snake_name(&mut i, $($($field),*)?) { - ControlFlow::Continue(()) => tail_call!(i.run()), + let _ = &mut i; + let mut debug = debug::Debug(i); + match debug.$snake_name($($($field),*)?) { + ControlFlow::Continue(()) => tail_call!(debug.0.run()), ControlFlow::Break(done) => done, } } @@ -110,28 +149,4 @@ macro_rules! define_opcode_handler { } for_each_op!(define_opcode_handler); - -macro_rules! define_extended_opcode_handler { - ($( - $( #[$attr:meta] )* - $snake_name:ident = $name:ident $( { - $( - $( #[$field_attr:meta] )* - $field:ident : $field_ty:ty - ),* - } )?; - )*) => {$( - fn $snake_name(mut i: Interpreter<'_>) -> Done { - $( - let ($($field,)*) = unwrap_uninhabited( - crate::decode::operands::$snake_name(i.bytecode()) - ); - )? - match ExtendedOpVisitor::$snake_name(&mut i, $($($field),*)?) { - ControlFlow::Continue(()) => tail_call!(i.run()), - ControlFlow::Break(done) => done, - } - } - )*}; -} -for_each_extended_op!(define_extended_opcode_handler); +for_each_extended_op!(define_opcode_handler); From fc9ec7a9c7aacff5c36823fc76f9481050f6c29a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 16:09:28 -0700 Subject: [PATCH 29/30] pulley: Support `runtests` in Cranelift filetests (#9795) This'll help when adding unit tests for Pulley and/or might be useful when debugging various lowerings and such in the future. I hope to enable more tests in the future once more pulley lowerings are available. --- cranelift/filetests/Cargo.toml | 2 +- .../filetests/filetests/runtests/br.clif | 4 ++ .../filetests/filetests/runtests/brif.clif | 4 ++ cranelift/filetests/src/function_runner.rs | 42 ++++++++++++++++-- cranelift/filetests/src/test_run.rs | 44 ++++++++++++++++--- 5 files changed, 87 insertions(+), 9 deletions(-) diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml index b6e0df9814f2..7736ff247903 100644 --- a/cranelift/filetests/Cargo.toml +++ b/cranelift/filetests/Cargo.toml @@ -37,4 +37,4 @@ serde = { workspace = true } serde_derive = { workspace = true } cranelift.workspace = true smallvec = { workspace = true } -pulley-interpreter = { workspace = true, features = ["disas", "std"] } +pulley-interpreter = { workspace = true, features = ["disas", "std", "interp"] } diff --git a/cranelift/filetests/filetests/runtests/br.clif b/cranelift/filetests/filetests/runtests/br.clif index 689682ebd594..7308c78bf66e 100644 --- a/cranelift/filetests/filetests/runtests/br.clif +++ b/cranelift/filetests/filetests/runtests/br.clif @@ -5,6 +5,10 @@ target s390x target x86_64 target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %jump() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/brif.clif b/cranelift/filetests/filetests/runtests/brif.clif index 60648d569cb2..54dea5ee6f26 100644 --- a/cranelift/filetests/filetests/runtests/brif.clif +++ b/cranelift/filetests/filetests/runtests/brif.clif @@ -5,6 +5,10 @@ target s390x target x86_64 target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %brif_value(i8) -> i64 { block0(v0: i8): diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index bc00b1be7dbc..4b9a43bf48a4 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -13,9 +13,12 @@ use cranelift_jit::{JITBuilder, JITModule}; use cranelift_module::{FuncId, Linkage, Module, ModuleError}; use cranelift_native::builder_with_options; use cranelift_reader::TestFile; +use pulley_interpreter::interp as pulley; use std::cmp::max; use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::ptr::NonNull; +use target_lexicon::Architecture; use thiserror::Error; const TESTFILE_NAMESPACE: u32 = 0; @@ -370,12 +373,45 @@ impl<'a> Trampoline<'a> { let function_ptr = self.module.get_finalized_function(self.func_id); let trampoline_ptr = self.module.get_finalized_function(self.trampoline_id); - let callable_trampoline: fn(*const u8, *mut u128) -> () = - unsafe { mem::transmute(trampoline_ptr) }; - callable_trampoline(function_ptr, arguments_address); + unsafe { + self.call_raw(trampoline_ptr, function_ptr, arguments_address); + } values.collect_returns(&self.func_signature) } + + unsafe fn call_raw( + &self, + trampoline_ptr: *const u8, + function_ptr: *const u8, + arguments_address: *mut u128, + ) { + match self.module.isa().triple().architecture { + // For the pulley target this is pulley bytecode, not machine code, + // so run the interpreter. + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be => { + let mut state = pulley::Vm::new(); + state.call( + NonNull::new(trampoline_ptr.cast_mut()).unwrap(), + &[ + pulley::XRegVal::new_ptr(function_ptr.cast_mut()).into(), + pulley::XRegVal::new_ptr(arguments_address).into(), + ], + [], + ); + } + + // Other targets natively execute this machine code. + _ => { + let callable_trampoline: fn(*const u8, *mut u128) -> () = + unsafe { mem::transmute(trampoline_ptr) }; + callable_trampoline(function_ptr, arguments_address); + } + } + } } /// Compilation Error when compiling a function. diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 1e7e977c2b24..aee78c164446 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -59,8 +59,23 @@ fn is_isa_compatible( let requested_arch = requested.triple().architecture; match (host_arch, requested_arch) { + // If the host matches the requested target, then that's all good. (host, requested) if host == requested => {} + + // Allow minor differences in risc-v targets. (Architecture::Riscv64(_), Architecture::Riscv64(_)) => {} + + // Any host can run pulley so long as the pointer width and endianness + // match. + ( + _, + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be, + ) if host.triple().pointer_width() == requested.triple().pointer_width() + && host.triple().endianness() == requested.triple().endianness() => {} + _ => { return Err(format!( "skipped {file_path}: host can't run {requested_arch:?} programs" @@ -72,6 +87,10 @@ fn is_isa_compatible( // we can't natively support on the host. let requested_flags = requested.isa_flags(); for req_value in requested_flags { + // pointer_width for pulley already validated above + if req_value.name == "pointer_width" { + continue; + } let requested = match req_value.as_bool() { Some(requested) => requested, None => unimplemented!("ISA flag {} of kind {:?}", req_value.name, req_value.kind()), @@ -116,11 +135,26 @@ fn compile_testfile( flags: &Flags, isa: &dyn TargetIsa, ) -> anyhow::Result { - // We can't use the requested ISA directly since it does not contain info - // about the operating system / calling convention / etc.. - // - // Copy the requested ISA flags into the host ISA and use that. - let isa = build_host_isa(false, flags.clone(), isa.isa_flags()); + let isa = match isa.triple().architecture { + // Convert `&dyn TargetIsa` to `OwnedTargetIsa` by re-making the ISA and + // applying pulley flags/etc. + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be => { + let mut builder = cranelift_codegen::isa::lookup(isa.triple().clone())?; + for value in isa.isa_flags() { + builder.set(value.name, &value.value_string()).unwrap(); + } + builder.finish(flags.clone())? + } + + // We can't use the requested ISA directly since it does not contain info + // about the operating system / calling convention / etc.. + // + // Copy the requested ISA flags into the host ISA and use that. + _ => build_host_isa(false, flags.clone(), isa.isa_flags()), + }; let mut tfc = TestFileCompiler::new(isa); tfc.add_testfile(testfile)?; From 9aa048b0eb0caf2bbd85a24bdada33928e42ad3b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 16:31:56 -0700 Subject: [PATCH 30/30] pulley: Flesh out conditional registers (#9793) * pulley: Flesh out conditional registers This commit redefines previous Pulley instructions working with conditional values and results to always operate on the low 32-bits of a register rather than the full 64-bit width of integer registers. This should help 32-bit platforms work with just a word and avoid an extraneous load of top bits that are likely always zero. The previous `br_if` and `br_if_not` instructions now have a "32" suffix to make it clear that they're only operating on 32-bit register widths. Additionally the `xeq32` family of instructions (compare-and-set) now all only define the low 32-bits of the destination register. Finally, lowerings of `select` in CLIF were added for integer and floating-point registers. cc #9783 * Fix interpreter tests --- .../codegen/src/isa/pulley_shared/abi.rs | 13 ++- .../codegen/src/isa/pulley_shared/inst.isle | 16 +--- .../src/isa/pulley_shared/inst/emit.rs | 6 +- .../codegen/src/isa/pulley_shared/inst/mod.rs | 8 +- .../codegen/src/isa/pulley_shared/lower.isle | 42 +++++++- .../filetests/isa/pulley32/brif.clif | 34 +++---- .../filetests/isa/pulley32/jump.clif | 4 +- .../filetests/isa/pulley32/trap.clif | 38 +++++--- .../filetests/isa/pulley64/brif.clif | 34 +++---- .../filetests/isa/pulley64/jump.clif | 4 +- .../filetests/isa/pulley64/trap.clif | 38 +++++--- crates/wast-util/src/lib.rs | 4 - pulley/src/interp.rs | 96 +++++++++++++++---- pulley/src/lib.rs | 47 +++++---- pulley/tests/all/interp.rs | 24 ++--- tests/disas/pulley/epoch-simple.wat | 2 +- 16 files changed, 262 insertions(+), 148 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index c304eb3f5031..d8e68c43ef76 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -532,16 +532,15 @@ where _isa_flags: &PulleyFlags, ) -> u32 { match rc { - // Spilling an integer register requires spilling 8 bytes, and spill - // slots are defined in terms of "word bytes" or the size of a - // pointer. That means on 32-bit pulley we need to take up two spill - // slots for integers where on 64-bit pulley we need to only take up - // one spill slot for integers. - RegClass::Int => match P::pointer_width() { + // Spilling an integer or float register requires spilling 8 bytes, + // and spill slots are defined in terms of "word bytes" or the size + // of a pointer. That means on 32-bit pulley we need to take up two + // spill slots where on 64-bit pulley we need to only take up one + // spill slot for integers. + RegClass::Int | RegClass::Float => match P::pointer_width() { PointerWidth::PointerWidth32 => 2, PointerWidth::PointerWidth64 => 1, }, - RegClass::Float => todo!(), RegClass::Vector => unreachable!(), } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 931aec79b5a8..41a4492fa184 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -61,7 +61,7 @@ (Jump (label MachLabel)) ;; Jump to `then` if `c` is nonzero, otherwise to `else`. - (BrIf (c XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIf32 (c XReg) (taken MachLabel) (not_taken MachLabel)) ;; Compare-and-branch macro ops. (BrIfXeq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) @@ -372,9 +372,9 @@ (rule (pulley_jump label) (SideEffectNoResult.Inst (MInst.Jump label))) -(decl pulley_br_if (XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if c taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIf c taken not_taken))) +(decl pulley_br_if32 (XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if32 c taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIf32 c taken not_taken))) (decl pulley_br_if_xeq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) (rule (pulley_br_if_xeq32 a b taken not_taken) @@ -431,11 +431,3 @@ (decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) (extern constructor gen_call_indirect gen_call_indirect) - -;;;; Helpers for Sign/Zero extension ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl zext (Value) XReg) -(rule (zext val @ (value_type $I64)) val) -(rule (zext val @ (value_type $I32)) (pulley_zext32 val)) -(rule (zext val @ (value_type $I16)) (pulley_zext16 val)) -(rule (zext val @ (value_type $I8)) (pulley_zext8 val)) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 5d3d752cf30d..463c920f4340 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -247,7 +247,7 @@ fn pulley_emit

( enc::jump(sink, 0x00000000); } - Inst::BrIf { + Inst::BrIf32 { c, taken, not_taken, @@ -258,14 +258,14 @@ fn pulley_emit

( sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(2)); let mut inverted = SmallVec::<[u8; 16]>::new(); - enc::br_if_not(&mut inverted, c, 0x00000000); + enc::br_if_not32(&mut inverted, c, 0x00000000); debug_assert_eq!( inverted.len(), usize::try_from(taken_end - *start_offset).unwrap() ); sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted); - enc::br_if(sink, c, 0x00000000); + enc::br_if32(sink, c, 0x00000000); debug_assert_eq!(sink.cur_offset(), taken_end); // If not taken. diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 3ab1c72033b4..41d82c2ed941 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -164,7 +164,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { Inst::Jump { .. } => {} - Inst::BrIf { + Inst::BrIf32 { c, taken: _, not_taken: _, @@ -426,7 +426,7 @@ where } | Inst::Rets { .. } => MachTerminator::Ret, Inst::Jump { .. } => MachTerminator::Uncond, - Inst::BrIf { .. } + Inst::BrIf32 { .. } | Inst::BrIfXeq32 { .. } | Inst::BrIfXneq32 { .. } | Inst::BrIfXslt32 { .. } @@ -651,7 +651,7 @@ impl Inst { Inst::Jump { label } => format!("jump {}", label.to_string()), - Inst::BrIf { + Inst::BrIf32 { c, taken, not_taken, @@ -659,7 +659,7 @@ impl Inst { let c = format_reg(**c); let taken = taken.to_string(); let not_taken = not_taken.to_string(); - format!("br_if {c}, {taken}; jump {not_taken}") + format!("br_if32 {c}, {taken}; jump {not_taken}") } Inst::BrIfXeq32 { diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index e96862716fd9..342bcc344d08 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -6,6 +6,20 @@ ;;;; Rules for Control Flow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Helper to place a conditional `Value` provided into a register. Pulley +;; conditional values occupy the full low 32-bits of a register and so this +;; needs to handle situations such as when the `Value` is 64-bits an explicit +;; comparison must be made. Additionally if `Value` is smaller than 32-bits +;; then it must be sign-extended up to at least 32 bits. +(decl lower_cond (Value) XReg) +(rule (lower_cond val @ (value_type $I64)) (pulley_xneq64 val (pulley_xconst8 0))) +(rule (lower_cond val @ (value_type $I32)) val) +(rule (lower_cond val @ (value_type $I16)) (pulley_zext16 val)) +(rule (lower_cond val @ (value_type $I8)) (pulley_zext8 val)) + +;; Peel away explicit `uextend` values to take a look at the inner value. +(rule 1 (lower_cond (uextend val)) (lower_cond val)) + ;; The main control-flow-lowering term: takes a control-flow instruction and ;; target(s) and emits the necessary instructions. (decl partial lower_branch (Inst MachLabelSlice) Unit) @@ -15,8 +29,8 @@ (emit_side_effect (pulley_jump label))) ;; Generic case for conditional branches. -(rule -1 (lower_branch (brif (maybe_uextend c) _ _) (two_targets then else)) - (emit_side_effect (pulley_br_if (zext c) then else))) +(rule -1 (lower_branch (brif c _ _) (two_targets then else)) + (emit_side_effect (pulley_br_if32 (lower_cond c) then else))) ;; Conditional branches on `icmp`s. (rule (lower_branch (brif (maybe_uextend (icmp cc a b @ (value_type $I32))) _ _) @@ -420,8 +434,14 @@ ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 _) (uextend val))) - (zext val)) +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I32)))) + (pulley_zext32 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I16)))) + (pulley_zext16 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I8)))) + (pulley_zext8 val)) ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -446,3 +466,17 @@ (rule (lower (has_type $I64 (uadd_overflow_trap a b tc))) (pulley_xadd64_uoverflow_trap a b tc)) + +;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (select c a b))) + (pulley_xselect32 (lower_cond c) a b)) + +(rule 1 (lower (has_type $I64 (select c a b))) + (pulley_xselect64 (lower_cond c) a b)) + +(rule 1 (lower (has_type $F32 (select c a b))) + (pulley_fselect32 (lower_cond c) a b)) + +(rule 1 (lower (has_type $F64 (select c a b))) + (pulley_fselect64 (lower_cond c) a b)) diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index 07826617c5ff..73059c7a65d9 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -17,7 +17,7 @@ block2: ; VCode: ; block0: ; zext8 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -27,7 +27,7 @@ block2: ; ; Disassembled: ; zext8 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -49,7 +49,7 @@ block2: ; VCode: ; block0: ; zext16 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -59,7 +59,7 @@ block2: ; ; Disassembled: ; zext16 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -80,8 +80,7 @@ block2: ; VCode: ; block0: -; zext32 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -90,8 +89,7 @@ block2: ; ret ; ; Disassembled: -; zext32 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x0, 0xa // target = 0xa ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -112,7 +110,9 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -121,7 +121,9 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -145,7 +147,7 @@ block2: ; block0: ; xeq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -156,7 +158,7 @@ block2: ; Disassembled: ; xeq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -180,7 +182,7 @@ block2: ; block0: ; xneq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -191,7 +193,7 @@ block2: ; Disassembled: ; xneq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -246,7 +248,7 @@ block2: ; block0: ; xulteq64 x6, x1, x0 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -257,7 +259,7 @@ block2: ; Disassembled: ; xulteq64 x6, x1, x0 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/jump.clif b/cranelift/filetests/filetests/isa/pulley32/jump.clif index a5a32180e7a8..d1029117857b 100644 --- a/cranelift/filetests/filetests/isa/pulley32/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley32/jump.clif @@ -20,7 +20,7 @@ block3(v3: i8): ; VCode: ; block0: ; zext8 x5, x0 -; br_if x5, label2; jump label1 +; br_if32 x5, label2; jump label1 ; block1: ; xconst8 x0, 0 ; jump label3 @@ -32,7 +32,7 @@ block3(v3: i8): ; ; Disassembled: ; zext8 x5, x0 -; br_if x5, 0xe // target = 0x11 +; br_if32 x5, 0xe // target = 0x11 ; xconst8 x0, 0 ; jump 0x8 // target = 0x14 ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index 99af0f918c83..8d5da4749bf1 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -110,21 +110,25 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; ret ; block2: -; xconst8 x5, 42 -; xconst8 x6, 0 -; trap_if ne, Size64, x5, x6 // code = TrapCode(1) +; xconst8 x7, 42 +; xconst8 x8, 0 +; trap_if ne, Size64, x7, x8 // code = TrapCode(1) ; ret ; ; Disassembled: -; br_if x0, 0x7 // target = 0x7 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x7 // target = 0xd ; ret -; xconst8 x5, 42 -; xconst8 x6, 0 -; br_if_xneq64 x5, x6, 0x8 // target = 0x15 +; xconst8 x7, 42 +; xconst8 x8, 0 +; br_if_xneq64 x7, x8, 0x8 // target = 0x1b ; ret ; trap @@ -145,20 +149,24 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 -; block1: ; xconst8 x4, 0 -; xconst8 x5, 0 -; trap_if eq, Size64, x4, x5 // code = TrapCode(1) +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 +; block1: +; xconst8 x6, 0 +; xconst8 x7, 0 +; trap_if eq, Size64, x6, x7 // code = TrapCode(1) ; ret ; block2: ; ret ; ; Disassembled: -; br_if x0, 0x14 // target = 0x14 ; xconst8 x4, 0 -; xconst8 x5, 0 -; br_if_xeq64 x4, x5, 0x9 // target = 0x15 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x14 // target = 0x1a +; xconst8 x6, 0 +; xconst8 x7, 0 +; br_if_xeq64 x6, x7, 0x9 // target = 0x1b ; ret ; ret ; trap diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index f8c854349eb5..d8ae5981d49f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -17,7 +17,7 @@ block2: ; VCode: ; block0: ; zext8 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -27,7 +27,7 @@ block2: ; ; Disassembled: ; zext8 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -49,7 +49,7 @@ block2: ; VCode: ; block0: ; zext16 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -59,7 +59,7 @@ block2: ; ; Disassembled: ; zext16 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -80,8 +80,7 @@ block2: ; VCode: ; block0: -; zext32 x4, x0 -; br_if x4, label2; jump label1 +; br_if32 x0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -90,8 +89,7 @@ block2: ; ret ; ; Disassembled: -; zext32 x4, x0 -; br_if x4, 0xa // target = 0xd +; br_if32 x0, 0xa // target = 0xa ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -112,7 +110,9 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -121,7 +121,9 @@ block2: ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -145,7 +147,7 @@ block2: ; block0: ; xeq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -156,7 +158,7 @@ block2: ; Disassembled: ; xeq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -180,7 +182,7 @@ block2: ; block0: ; xneq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -191,7 +193,7 @@ block2: ; Disassembled: ; xneq32 x6, x0, x1 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -246,7 +248,7 @@ block2: ; block0: ; xulteq64 x6, x1, x0 ; zext8 x6, x6 -; br_if x6, label2; jump label1 +; br_if32 x6, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -257,7 +259,7 @@ block2: ; Disassembled: ; xulteq64 x6, x1, x0 ; zext8 x6, x6 -; br_if x6, 0xa // target = 0x10 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/jump.clif b/cranelift/filetests/filetests/isa/pulley64/jump.clif index 7e1b2cc60dbb..a4b187bcc47f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley64/jump.clif @@ -20,7 +20,7 @@ block3(v3: i8): ; VCode: ; block0: ; zext8 x5, x0 -; br_if x5, label2; jump label1 +; br_if32 x5, label2; jump label1 ; block1: ; xconst8 x0, 0 ; jump label3 @@ -32,7 +32,7 @@ block3(v3: i8): ; ; Disassembled: ; zext8 x5, x0 -; br_if x5, 0xe // target = 0x11 +; br_if32 x5, 0xe // target = 0x11 ; xconst8 x0, 0 ; jump 0x8 // target = 0x14 ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index 34811b40efd7..ed68dbdf1665 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -110,21 +110,25 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; ret ; block2: -; xconst8 x5, 42 -; xconst8 x6, 0 -; trap_if ne, Size64, x5, x6 // code = TrapCode(1) +; xconst8 x7, 42 +; xconst8 x8, 0 +; trap_if ne, Size64, x7, x8 // code = TrapCode(1) ; ret ; ; Disassembled: -; br_if x0, 0x7 // target = 0x7 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x7 // target = 0xd ; ret -; xconst8 x5, 42 -; xconst8 x6, 0 -; br_if_xneq64 x5, x6, 0x8 // target = 0x15 +; xconst8 x7, 42 +; xconst8 x8, 0 +; br_if_xneq64 x7, x8, 0x8 // target = 0x1b ; ret ; trap @@ -145,20 +149,24 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 -; block1: ; xconst8 x4, 0 -; xconst8 x5, 0 -; trap_if eq, Size64, x4, x5 // code = TrapCode(1) +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 +; block1: +; xconst8 x6, 0 +; xconst8 x7, 0 +; trap_if eq, Size64, x6, x7 // code = TrapCode(1) ; ret ; block2: ; ret ; ; Disassembled: -; br_if x0, 0x14 // target = 0x14 ; xconst8 x4, 0 -; xconst8 x5, 0 -; br_if_xeq64 x4, x5, 0x9 // target = 0x15 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x14 // target = 0x1a +; xconst8 x6, 0 +; xconst8 x7, 0 +; br_if_xeq64 x6, x7, 0x9 // target = 0x1b ; ret ; ret ; trap diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 2feadf5bbfac..7cf2c05a2631 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -395,7 +395,6 @@ impl WastTest { // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { let unsupported = [ - "misc_testsuite/br-table-fuzzbug.wast", "misc_testsuite/call_indirect.wast", "misc_testsuite/component-model/fused.wast", "misc_testsuite/component-model/strings.wast", @@ -413,7 +412,6 @@ impl WastTest { "misc_testsuite/gc/anyref_that_is_i31_barriers.wast", "misc_testsuite/gc/i31ref-of-global-initializers.wast", "misc_testsuite/gc/i31ref-tables.wast", - "misc_testsuite/gc/ref-test.wast", "misc_testsuite/int-to-float-splat.wast", "misc_testsuite/issue1809.wast", "misc_testsuite/issue4840.wast", @@ -440,7 +438,6 @@ impl WastTest { "misc_testsuite/simd/spillslot-size-fuzzbug.wast", "misc_testsuite/simd/unaligned-load.wast", "misc_testsuite/simd/v128-select.wast", - "misc_testsuite/sink-float-but-dont-trap.wast", "misc_testsuite/table_copy.wast", "misc_testsuite/table_copy_on_imported_tables.wast", "misc_testsuite/threads/LB_atomic.wast", @@ -458,7 +455,6 @@ impl WastTest { "misc_testsuite/winch/table_fill.wast", "misc_testsuite/winch/table_get.wast", "misc_testsuite/winch/table_set.wast", - "spec_testsuite/br_if.wast", "spec_testsuite/bulk.wast", "spec_testsuite/call.wast", "spec_testsuite/call_indirect.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index a4040a3838a0..c880eb1d40ff 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -904,8 +904,8 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } - fn br_if(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { - let cond = self.state[cond].get_u64(); + fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { + let cond = self.state[cond].get_u32(); if cond != 0 { self.pc_rel_jump::(offset) } else { @@ -913,8 +913,8 @@ impl OpVisitor for Interpreter<'_> { } } - fn br_if_not(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { - let cond = self.state[cond].get_u64(); + fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { + let cond = self.state[cond].get_u32(); if cond == 0 { self.pc_rel_jump::(offset) } else { @@ -1177,84 +1177,84 @@ impl OpVisitor for Interpreter<'_> { fn xeq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a == b)); + self.state[operands.dst].set_u32(u32::from(a == b)); ControlFlow::Continue(()) } fn xneq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a != b)); + self.state[operands.dst].set_u32(u32::from(a != b)); ControlFlow::Continue(()) } fn xslt64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i64(); let b = self.state[operands.src2].get_i64(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xslteq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i64(); let b = self.state[operands.src2].get_i64(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xult64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xulteq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xeq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a == b)); + self.state[operands.dst].set_u32(u32::from(a == b)); ControlFlow::Continue(()) } fn xneq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a != b)); + self.state[operands.dst].set_u32(u32::from(a != b)); ControlFlow::Continue(()) } fn xslt32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i32(); let b = self.state[operands.src2].get_i32(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xslteq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i32(); let b = self.state[operands.src2].get_i32(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xult32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xulteq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } @@ -1774,6 +1774,70 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_u64(a.leading_zeros().into()); ControlFlow::Continue(()) } + + fn xselect32( + &mut self, + dst: XReg, + cond: XReg, + if_nonzero: XReg, + if_zero: XReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_u32() + } else { + self.state[if_zero].get_u32() + }; + self.state[dst].set_u32(result); + ControlFlow::Continue(()) + } + + fn xselect64( + &mut self, + dst: XReg, + cond: XReg, + if_nonzero: XReg, + if_zero: XReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_u64() + } else { + self.state[if_zero].get_u64() + }; + self.state[dst].set_u64(result); + ControlFlow::Continue(()) + } + + fn fselect32( + &mut self, + dst: FReg, + cond: XReg, + if_nonzero: FReg, + if_zero: FReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_f32() + } else { + self.state[if_zero].get_f32() + }; + self.state[dst].set_f32(result); + ControlFlow::Continue(()) + } + + fn fselect64( + &mut self, + dst: FReg, + cond: XReg, + if_nonzero: FReg, + if_zero: FReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_f64() + } else { + self.state[if_zero].get_f64() + }; + self.state[dst].set_f64(result); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 466ab221cd59..a2a462ad5b25 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -101,13 +101,13 @@ macro_rules! for_each_op { /// Unconditionally transfer control to the PC at the given offset. jump = Jump { offset: PcRelOffset }; - /// Conditionally transfer control to the given PC offset if `cond` - /// contains a non-zero value. - br_if = BrIf { cond: XReg, offset: PcRelOffset }; + /// Conditionally transfer control to the given PC offset if + /// `low32(cond)` contains a non-zero value. + br_if32 = BrIf { cond: XReg, offset: PcRelOffset }; - /// Conditionally transfer control to the given PC offset if `cond` - /// contains a zero value. - br_if_not = BrIfNot { cond: XReg, offset: PcRelOffset }; + /// Conditionally transfer control to the given PC offset if + /// `low32(cond)` contains a zero value. + br_if_not32 = BrIfNot { cond: XReg, offset: PcRelOffset }; /// Branch if `a == b`. br_if_xeq32 = BrIfXeq32 { a: XReg, b: XReg, offset: PcRelOffset }; @@ -134,7 +134,7 @@ macro_rules! for_each_op { /// Branch if unsigned `a <= b`. br_if_xulteq64 = BrIfXulteq64 { a: XReg, b: XReg, offset: PcRelOffset }; - /// Branch to the label indicated by `idx`. + /// Branch to the label indicated by `low32(idx)`. /// /// After this instruction are `amt` instances of `PcRelOffset` /// and the `idx` selects which one will be branched to. The value @@ -207,29 +207,29 @@ macro_rules! for_each_op { /// `dst = src1 >> low6(src2)` xshr64_u = Xshr64U { operands: BinaryOperands }; - /// 64-bit equality. + /// `low32(dst) = src1 == src2` xeq64 = Xeq64 { operands: BinaryOperands }; - /// 64-bit inequality. + /// `low32(dst) = src1 != src2` xneq64 = Xneq64 { operands: BinaryOperands }; - /// 64-bit signed less-than. + /// `low32(dst) = src1 < src2` (signed) xslt64 = Xslt64 { operands: BinaryOperands }; - /// 64-bit signed less-than-equal. + /// `low32(dst) = src1 <= src2` (signed) xslteq64 = Xslteq64 { operands: BinaryOperands }; - /// 64-bit unsigned less-than. + /// `low32(dst) = src1 < src2` (unsigned) xult64 = Xult64 { operands: BinaryOperands }; - /// 64-bit unsigned less-than-equal. + /// `low32(dst) = src1 <= src2` (unsigned) xulteq64 = Xulteq64 { operands: BinaryOperands }; - /// 32-bit equality. + /// `low32(dst) = low32(src1) == low32(src2)` xeq32 = Xeq32 { operands: BinaryOperands }; - /// 32-bit inequality. + /// `low32(dst) = low32(src1) != low32(src2)` xneq32 = Xneq32 { operands: BinaryOperands }; - /// 32-bit signed less-than. + /// `low32(dst) = low32(src1) < low32(src2)` (signed) xslt32 = Xslt32 { operands: BinaryOperands }; - /// 32-bit signed less-than-equal. + /// `low32(dst) = low32(src1) <= low32(src2)` (signed) xslteq32 = Xslteq32 { operands: BinaryOperands }; - /// 32-bit unsigned less-than. + /// `low32(dst) = low32(src1) < low32(src2)` (unsigned) xult32 = Xult32 { operands: BinaryOperands }; - /// 32-bit unsigned less-than-equal. + /// `low32(dst) = low32(src1) <= low32(src2)` (unsigned) xulteq32 = Xulteq32 { operands: BinaryOperands }; /// `low32(dst) = zext(*(ptr + offset))` @@ -386,6 +386,15 @@ macro_rules! for_each_op { flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg }; /// `low32(dst) = zext(src1 <= src2)` flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg }; + + /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)` + xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; + /// `dst = low32(cond) ? if_nonzero : if_zero` + xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; + /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)` + fselect32 = FSelect32 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg }; + /// `dst = low32(cond) ? if_nonzero : if_zero` + fselect64 = FSelect64 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg }; } }; } diff --git a/pulley/tests/all/interp.rs b/pulley/tests/all/interp.rs index 34dae9db7f3d..c93aaeea7456 100644 --- a/pulley/tests/all/interp.rs +++ b/pulley/tests/all/interp.rs @@ -194,7 +194,7 @@ fn xeq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -219,7 +219,7 @@ fn xneq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -251,7 +251,7 @@ fn xslt64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -283,7 +283,7 @@ fn xslteq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -312,7 +312,7 @@ fn xult64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -341,7 +341,7 @@ fn xulteq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -370,7 +370,7 @@ fn xeq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -396,7 +396,7 @@ fn xneq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -430,7 +430,7 @@ fn xslt32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -462,7 +462,7 @@ fn xslteq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -490,7 +490,7 @@ fn xult32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -518,7 +518,7 @@ fn xulteq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index f39f6875a934..687ada74d2f1 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -13,7 +13,7 @@ ;; xload64le_offset32 x8, x8, 8 ;; xulteq64 x8, x8, x9 ;; zext8 x8, x8 -;; br_if x8, 0x8 // target = 0x2b +;; br_if32 x8, 0x8 // target = 0x2b ;; 29: pop_frame ;; ret ;; 2b: call 0xa2 // target = 0xcd