diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57f8b7049020..ddcbc49c08aa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -605,9 +605,12 @@ jobs: # Check that `pulley-interpreter` compiles with tail calls enabled. Don't # actually run the tests with tail calls enabled, because they are not yet # implemented in rustc and cause an ICE. - - run: cargo check -p pulley-interpreter + - run: cargo check -p pulley-interpreter --all-features env: RUSTFLAGS: "--cfg pulley_tail_calls" + - run: cargo test -p pulley-interpreter --all-features --release + env: + RUSTFLAGS: "--cfg pulley_assume_llvm_makes_tail_calls" # Ensure that fuzzers still build. # diff --git a/ADOPTERS.md b/ADOPTERS.md index e211ab5378d6..e8b74e428ef3 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -12,6 +12,7 @@ Wasmtime is used in many different production use-cases. This list has grown sig | [Embark Studios](https://www.embark-studios.com/) | [@repi](https://github.com/repi) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Rust game engine | | [Fastly](https://fastly.com/) | [@fitzgen](https://github.com/fitzgen) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | The Compute@Edge platform helps you compile your custom code to WebAssembly and runs it at the Fastly edge using the WebAssembly System Interface for each compute request. | | [Fermyon](https://fermyon.com) | [@tschneidereit](https://github.com/tschneidereit) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Fermyon Cloud is a cloud application platform for WebAssembly-based serverless functions and microservices. | +| [Huawei](https://www.huawei.com) | [@mohrobati](https://github.com/mohrobati) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Huawei Cloud uses Wasmtime to run WebAssembly functions in both serverless cloud and on the edge. | | [InfinyOn](https://infinyon.com/) | [@sehz](https://github.com/sehz) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | InfinyOn leverages the power of WebAssembly SmartModules to execute real-time data transformations. | | [Microsoft](https://microsoft.com/) | [@devigned](https://gist.github.com/devigned) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Microsoft has had Wasmtime in preview for its WebAssembly System Interface (WASI) node pools in Azure Kubernetes Service since October 2021. | | [Redpanda](https://redpanda.com/) | [@rockwotj](https://github.com/rockwotj) | ![beta](https://img.shields.io/badge/-production-blue?style=flat) | Redpanda Data Transforms allow developers to transform data directly in the message broker. | diff --git a/Cargo.lock b/Cargo.lock index 30c08e474995..a074245a1603 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,9 +139,9 @@ checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arbitrary" -version = "1.3.1" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e1373abdaa212b704512ec2bd8b26bd0b7d5c3f70117411a5d9a451383c859" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" dependencies = [ "derive_arbitrary", ] @@ -760,6 +760,7 @@ name = "cranelift-codegen-meta" version = "0.116.0" dependencies = [ "cranelift-codegen-shared", + "pulley-interpreter", ] [[package]] @@ -1083,9 +1084,9 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8" +checksum = "d475dfebcb4854d596b17b09f477616f80f17a550517f2b3615d8c205d5c802b" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 64c09dcf4e59..ed38f7721b31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -179,8 +179,13 @@ unused_import_braces = 'warn' unused-lifetimes = 'warn' unused-macro-rules = 'warn' -# Don't warn about unknown cfg condition in `#[cfg(pulley_tail_calls)]` -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(pulley_tail_calls)'] } +# Don't warn about unknown cfgs for pulley +[workspace.lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(pulley_tail_calls)', + 'cfg(pulley_assume_llvm_makes_tail_calls)', +] [workspace.lints.clippy] # The default set of lints in Clippy is viewed as "too noisy" right now so @@ -198,7 +203,7 @@ unnecessary_cast = 'warn' allow_attributes_without_reason = 'warn' [workspace.dependencies] -arbitrary = { version = "1.3.1" } +arbitrary = { version = "1.4.0" } wasmtime-wmemcheck = { path = "crates/wmemcheck", version = "=29.0.0" } wasmtime = { path = "crates/wasmtime", version = "29.0.0", default-features = false } wasmtime-c-api-macros = { path = "crates/c-api-macros", version = "=29.0.0" } diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 8c4a0a97f5b3..469cd639fc7e 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -86,7 +86,12 @@ x86 = [] arm64 = [] s390x = [] riscv64 = [] -pulley = ["dep:pulley-interpreter", "pulley-interpreter/encode", "pulley-interpreter/disas"] +pulley = [ + "dep:pulley-interpreter", + "pulley-interpreter/encode", + "pulley-interpreter/disas", + "cranelift-codegen-meta/pulley", +] # Enable the ISA target for the host machine host-arch = [] diff --git a/cranelift/codegen/meta/Cargo.toml b/cranelift/codegen/meta/Cargo.toml index 52e3df080ff9..46f042af9148 100644 --- a/cranelift/codegen/meta/Cargo.toml +++ b/cranelift/codegen/meta/Cargo.toml @@ -17,3 +17,7 @@ rustdoc-args = [ "--document-private-items" ] [dependencies] cranelift-codegen-shared = { path = "../shared", version = "0.116.0" } +pulley-interpreter = { workspace = true, optional = true } + +[features] +pulley = ['dep:pulley-interpreter'] diff --git a/cranelift/codegen/meta/src/isle.rs b/cranelift/codegen/meta/src/isle.rs index da32cda673f6..e2210df3a31d 100644 --- a/cranelift/codegen/meta/src/isle.rs +++ b/cranelift/codegen/meta/src/isle.rs @@ -64,6 +64,8 @@ pub fn get_isle_compilations( let prelude_isle = codegen_crate_dir.join("src").join("prelude.isle"); let prelude_opt_isle = codegen_crate_dir.join("src").join("prelude_opt.isle"); let prelude_lower_isle = codegen_crate_dir.join("src").join("prelude_lower.isle"); + #[cfg(feature = "pulley")] + let pulley_gen = gen_dir.join("pulley_gen.isle"); // Directory for mid-end optimizations. let src_opts = codegen_crate_dir.join("src").join("opts"); @@ -73,6 +75,7 @@ pub fn get_isle_compilations( let src_isa_aarch64 = codegen_crate_dir.join("src").join("isa").join("aarch64"); let src_isa_s390x = codegen_crate_dir.join("src").join("isa").join("s390x"); let src_isa_risc_v = codegen_crate_dir.join("src").join("isa").join("riscv64"); + #[cfg(feature = "pulley")] let src_isa_pulley_shared = codegen_crate_dir .join("src") .join("isa") @@ -166,6 +169,7 @@ pub fn get_isle_compilations( untracked_inputs: vec![clif_lower_isle.clone()], }, // The Pulley instruction selector. + #[cfg(feature = "pulley")] IsleCompilation { name: "pulley".to_string(), output: gen_dir.join("isle_pulley_shared.rs"), @@ -175,7 +179,7 @@ pub fn get_isle_compilations( src_isa_pulley_shared.join("inst.isle"), src_isa_pulley_shared.join("lower.isle"), ], - untracked_inputs: vec![clif_lower_isle.clone()], + untracked_inputs: vec![pulley_gen.clone(), clif_lower_isle.clone()], }, ], } diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 480719d045d1..188e7e7ffbfb 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -20,6 +20,9 @@ mod constant_hash; mod shared; mod unique_table; +#[cfg(feature = "pulley")] +mod pulley; + /// Generate an ISA from an architecture string (e.g. "x86_64"). pub fn isa_from_arch(arch: &str) -> Result { isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{arch}`")) @@ -63,6 +66,11 @@ fn generate_rust_for_shared_defs( )?; } + #[cfg(feature = "pulley")] + if isas.contains(&isa::Isa::Pulley32) || isas.contains(&isa::Isa::Pulley64) { + pulley::generate_rust("pulley_inst_gen.rs", out_dir)?; + } + Ok(()) } @@ -82,7 +90,12 @@ fn generate_isle_for_shared_defs( "clif_opt.isle", "clif_lower.isle", isle_dir, - ) + )?; + + #[cfg(feature = "pulley")] + pulley::generate_isle("pulley_gen.isle", isle_dir)?; + + Ok(()) } /// Generates all the source files used in Cranelift from the meta-language. diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs new file mode 100644 index 000000000000..6e9d3831b783 --- /dev/null +++ b/cranelift/codegen/meta/src/pulley.rs @@ -0,0 +1,352 @@ +use crate::error::Error; +use std::path::Path; + +struct Inst<'a> { + snake_name: &'a str, + name: &'a str, + fields: &'a [(&'a str, &'a str)], +} + +macro_rules! define { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { $( $field:ident : $field_ty:ty ),* } )? ; + )* + ) => { + &[$(Inst { + snake_name: stringify!($snake_name), + name: stringify!($name), + fields: &[$($( (stringify!($field), stringify!($field_ty)), )*)?], + }),*] + // helpers.push_str(concat!("(define pulley_", stringify!($snake_name), " (")); + }; +} + +const OPS: &[Inst<'_>] = pulley_interpreter::for_each_op!(define); +const EXTENDED_OPS: &[Inst<'_>] = pulley_interpreter::for_each_extended_op!(define); + +enum Operand<'a> { + Normal { name: &'a str, ty: &'a str }, + Writable { name: &'a str, ty: &'a str }, + TrapCode { name: &'a str, ty: &'a str }, + Binop { reg: &'a str }, +} + +impl Inst<'_> { + fn operands(&self) -> impl Iterator> { + self.fields + .iter() + .map(|(name, ty)| match (*name, *ty) { + ("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" }, + (name, "RegSet < XReg >") => Operand::Normal { + name, + ty: "VecXReg", + }, + ("dst", ty) => Operand::Writable { name, ty }, + (name, ty) => Operand::Normal { name, ty }, + }) + .chain(if self.name.contains("Trap") { + Some(Operand::TrapCode { + name: "code", + ty: "TrapCode", + }) + } else { + None + }) + } + + fn skip(&self) -> bool { + match self.name { + // Skip instructions related to control-flow as those require + // special handling with `MachBuffer`. + "Jump" | "Call" | "CallIndirect" => true, + + // Skip special instructions not used in Cranelift. + "XPush32Many" | "XPush64Many" | "XPop32Many" | "XPop64Many" => true, + + // Skip more branching-related instructions. + n => n.starts_with("Br"), + } + } +} + +pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { + let mut rust = String::new(); + + // Generate a pretty-printing method for debugging. + rust.push_str("pub fn print(inst: &RawInst) -> String {\n"); + rust.push_str("match inst {\n"); + for inst @ Inst { name, .. } in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut locals = String::new(); + let mut format_string = String::new(); + format_string.push_str(inst.snake_name); + for (i, op) in inst.operands().enumerate() { + match op { + Operand::Normal { name, ty } | Operand::Writable { name, ty } => { + pat.push_str(name); + pat.push_str(","); + + if i > 0 { + format_string.push_str(","); + } + format_string.push_str(" {"); + format_string.push_str(name); + format_string.push_str("}"); + + if ty.contains("Reg") { + if name == "dst" { + locals.push_str(&format!("let {name} = reg_name(*{name}.to_reg());\n")); + } else { + locals.push_str(&format!("let {name} = reg_name(**{name});\n")); + } + } + } + Operand::TrapCode { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + format_string.push_str(&format!(" // trap={{{name}:?}}")); + } + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + format_string.push_str(" {dst}, {src1}, {src2}"); + locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n")); + locals.push_str(&format!("let src1 = reg_name(**src1);\n")); + locals.push_str(&format!("let src2 = reg_name(**src2);\n")); + } + } + } + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} }} => {{ + {locals} + format!(\"{format_string}\") + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + // Generate `get_operands` to feed information to regalloc + rust.push_str( + "pub fn get_operands(inst: &mut RawInst, collector: &mut impl OperandVisitor) {\n", + ); + rust.push_str("match inst {\n"); + for inst @ Inst { name, .. } in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut uses = Vec::new(); + let mut defs = Vec::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } => { + if ty.contains("Reg") { + uses.push(name); + pat.push_str(name); + pat.push_str(","); + } + } + Operand::Writable { name, ty } => { + if ty.contains("Reg") { + defs.push(name); + pat.push_str(name); + pat.push_str(","); + } + } + Operand::TrapCode { .. } => {} + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + uses.push("src1"); + uses.push("src2"); + defs.push("dst"); + } + } + } + + let uses = uses + .iter() + .map(|u| format!("collector.reg_use({u});\n")) + .collect::(); + let defs = defs + .iter() + .map(|u| format!("collector.reg_def({u});\n")) + .collect::(); + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} .. }} => {{ + {uses} + {defs} + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + // Generate an emission method + rust.push_str("pub fn emit

(inst: &RawInst, sink: &mut MachBuffer>)\n"); + rust.push_str(" where P: PulleyTargetKind,\n"); + rust.push_str("{\n"); + rust.push_str("match *inst {\n"); + for inst @ Inst { + name, snake_name, .. + } in OPS.iter().chain(EXTENDED_OPS) + { + if inst.skip() { + continue; + } + + let mut pat = String::new(); + let mut args = String::new(); + let mut trap = String::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty: _ } | Operand::Writable { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + + args.push_str(name); + args.push_str(","); + } + Operand::TrapCode { name, ty: _ } => { + pat.push_str(name); + pat.push_str(","); + trap.push_str(&format!("sink.add_trap({name});\n")); + } + Operand::Binop { reg: _ } => { + pat.push_str("dst, src1, src2,"); + args.push_str( + "pulley_interpreter::regs::BinaryOperands::new(dst, src1, src2),", + ); + } + } + } + + rust.push_str(&format!( + " + RawInst::{name} {{ {pat} }} => {{ + {trap} + pulley_interpreter::encode::{snake_name}(sink, {args}) + }} + " + )); + } + rust.push_str("}\n"); + rust.push_str("}\n"); + + std::fs::write(out_dir.join(filename), rust)?; + Ok(()) +} + +pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { + let mut isle = String::new(); + + // Generate the `RawInst` enum + isle.push_str("(type RawInst (enum\n"); + for inst in OPS.iter().chain(EXTENDED_OPS) { + if inst.skip() { + continue; + } + isle.push_str(" ("); + isle.push_str(inst.name); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } | Operand::TrapCode { name, ty } => { + isle.push_str(&format!("\n ({name} {ty})")); + } + Operand::Writable { name, ty } => { + isle.push_str(&format!("\n ({name} Writable{ty})")); + } + Operand::Binop { reg } => { + isle.push_str(&format!("\n (dst Writable{reg})")); + isle.push_str(&format!("\n (src1 {reg})")); + isle.push_str(&format!("\n (src2 {reg})")); + } + } + } + isle.push_str(")\n"); + } + isle.push_str("))\n"); + + // Generate the `pulley_*` constructors with a `decl` and a `rule`. + for inst @ Inst { + name, snake_name, .. + } in OPS.iter().chain(EXTENDED_OPS) + { + if inst.skip() { + continue; + } + // generate `decl` and `rule` at the same time, placing the `rule` in + // temporary storage on the side. Makes generation a bit easier to read + // as opposed to doing the decl first then the rule. + let mut rule = String::new(); + isle.push_str(&format!("(decl pulley_{snake_name} (")); + rule.push_str(&format!("(rule (pulley_{snake_name} ")); + let mut result = None; + let mut ops = Vec::new(); + for op in inst.operands() { + match op { + Operand::Normal { name, ty } | Operand::TrapCode { name, ty } => { + isle.push_str(ty); + rule.push_str(name); + ops.push(name); + } + Operand::Writable { name: _, ty } => { + assert!(result.is_none(), "{} has >1 result", inst.snake_name); + result = Some(ty); + } + Operand::Binop { reg } => { + isle.push_str(&format!("{reg} {reg}")); + rule.push_str("src1 src2"); + ops.push("src1"); + ops.push("src2"); + assert!(result.is_none(), "{} has >1 result", inst.snake_name); + result = Some(reg); + } + } + isle.push_str(" "); + rule.push_str(" "); + } + isle.push_str(") "); + rule.push_str(")"); + let ops = ops.join(" "); + match result { + Some(result) => { + isle.push_str(result); + rule.push_str(&format!( + " + (let ( + (dst Writable{result} (temp_writable_{})) + (_ Unit (emit (RawInst.{name} dst {ops}))) + ) + dst))\ +\n", + result.to_lowercase() + )); + } + None => { + isle.push_str("SideEffectNoResult"); + rule.push_str(&format!( + " (SideEffectNoResult.Inst (RawInst.{name} {ops})))\n", + )); + } + } + isle.push_str(")\n"); + + isle.push_str(&rule); + } + + std::fs::write(out_dir.join(filename), isle)?; + Ok(()) +} diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 831dbe282865..d8e68c43ef76 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -182,12 +182,12 @@ where let src = XReg::new(src).unwrap(); let dst = dst.try_into().unwrap(); match (signed, from_bits) { - (true, 8) => Inst::Sext8 { dst, src }.into(), - (true, 16) => Inst::Sext16 { dst, src }.into(), - (true, 32) => Inst::Sext32 { dst, src }.into(), - (false, 8) => Inst::Zext8 { dst, src }.into(), - (false, 16) => Inst::Zext16 { dst, src }.into(), - (false, 32) => Inst::Zext32 { dst, src }.into(), + (true, 8) => RawInst::Sext8 { dst, src }.into(), + (true, 16) => RawInst::Sext16 { dst, src }.into(), + (true, 32) => RawInst::Sext32 { dst, src }.into(), + (false, 8) => RawInst::Zext8 { dst, src }.into(), + (false, 16) => RawInst::Zext16 { dst, src }.into(), + (false, 32) => RawInst::Zext32 { dst, src }.into(), _ => unimplemented!("extend {from_bits} to {to_bits} as signed? {signed}"), } } @@ -220,8 +220,8 @@ where let dst = into_reg.try_into().unwrap(); let imm = imm as i32; smallvec![ - Inst::Xconst32 { dst, imm }.into(), - Inst::Xadd32 { + RawInst::Xconst32 { dst, imm }.into(), + RawInst::Xadd32 { dst, src1: from_reg.try_into().unwrap(), src2: dst.to_reg(), @@ -243,7 +243,6 @@ where } fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { - let offset = i64::from(offset); let base = XReg::try_from(base).unwrap(); let mem = Amode::RegOffset { base, offset }; Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into() @@ -261,13 +260,13 @@ where let inst = if amount < 0 { let amount = amount.checked_neg().unwrap(); if let Ok(amt) = u32::try_from(amount) { - Inst::StackAlloc32 { amt } + RawInst::StackAlloc32 { amt } } else { unreachable!() } } else { if let Ok(amt) = u32::try_from(amount) { - Inst::StackFree32 { amt } + RawInst::StackFree32 { amt } } else { unreachable!() } @@ -284,7 +283,7 @@ where let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { - insts.push(Inst::PushFrame.into()); + insts.push(RawInst::PushFrame.into()); if flags.unwind_info() { insts.push( Inst::Unwind { @@ -310,7 +309,7 @@ where let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { - insts.push(Inst::PopFrame.into()); + insts.push(RawInst::PopFrame.into()); } if frame_layout.tail_args_size > 0 { @@ -327,7 +326,7 @@ where _isa_flags: &PulleyFlags, _frame_layout: &FrameLayout, ) -> SmallInstVec { - smallvec![Inst::Ret {}.into()] + smallvec![RawInst::Ret {}.into()] } fn gen_probestack(_insts: &mut SmallInstVec, _frame_size: u32) { @@ -365,7 +364,7 @@ where Inst::gen_load( writable_fp_reg(), Amode::SpOffset { - offset: i64::from(incoming_args_diff), + offset: i32::try_from(incoming_args_diff).unwrap(), }, I64, MemFlags::trusted(), @@ -423,7 +422,7 @@ where insts.push( Inst::gen_store( Amode::SpOffset { - offset: i64::from(stack_size - cur_offset), + offset: i32::try_from(stack_size - cur_offset).unwrap(), }, Reg::from(reg.to_reg()), ty, @@ -474,7 +473,7 @@ where Inst::gen_load( reg.map(Reg::from), Amode::SpOffset { - offset: i64::from(stack_size - cur_offset), + offset: i32::try_from(stack_size - cur_offset).unwrap(), }, ty, MemFlags::trusted(), @@ -533,16 +532,15 @@ where _isa_flags: &PulleyFlags, ) -> u32 { match rc { - // Spilling an integer register requires spilling 8 bytes, and spill - // slots are defined in terms of "word bytes" or the size of a - // pointer. That means on 32-bit pulley we need to take up two spill - // slots for integers where on 64-bit pulley we need to only take up - // one spill slot for integers. - RegClass::Int => match P::pointer_width() { + // Spilling an integer or float register requires spilling 8 bytes, + // and spill slots are defined in terms of "word bytes" or the size + // of a pointer. That means on 32-bit pulley we need to take up two + // spill slots where on 64-bit pulley we need to only take up one + // spill slot for integers. + RegClass::Int | RegClass::Float => match P::pointer_width() { PointerWidth::PointerWidth32 => 2, PointerWidth::PointerWidth64 => 1, }, - RegClass::Float => todo!(), RegClass::Vector => unreachable!(), } } @@ -647,22 +645,22 @@ const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty() .with(px_reg(30)) .with(px_reg(31)) // Float registers. - .with(px_reg(16)) - .with(px_reg(17)) - .with(px_reg(18)) - .with(px_reg(19)) - .with(px_reg(20)) - .with(px_reg(21)) - .with(px_reg(22)) - .with(px_reg(23)) - .with(px_reg(24)) - .with(px_reg(25)) - .with(px_reg(26)) - .with(px_reg(27)) - .with(px_reg(28)) - .with(px_reg(29)) - .with(px_reg(30)) - .with(px_reg(31)) + .with(pf_reg(16)) + .with(pf_reg(17)) + .with(pf_reg(18)) + .with(pf_reg(19)) + .with(pf_reg(20)) + .with(pf_reg(21)) + .with(pf_reg(22)) + .with(pf_reg(23)) + .with(pf_reg(24)) + .with(pf_reg(25)) + .with(pf_reg(26)) + .with(pf_reg(27)) + .with(pf_reg(28)) + .with(pf_reg(29)) + .with(pf_reg(30)) + .with(pf_reg(31)) // Note: no vector registers are callee-saved. ; diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index c80cacd0b5b1..41a4492fa184 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -30,9 +30,6 @@ ;;;; Actual Instructions ;;;; - ;; Raise a trap. - (Trap (code TrapCode)) - ;; Trap if `src1 cond src2`. (TrapIf (cond IntCC) (size OperandSize) (src1 XReg) (src2 XReg) (code TrapCode)) @@ -45,9 +42,6 @@ (dst WritableXReg) (reg XReg)) - ;; Return. - (Ret) - ;; Load an external symbol's address into a register. (LoadExtName (dst WritableXReg) (name BoxExternalName) @@ -67,7 +61,7 @@ (Jump (label MachLabel)) ;; Jump to `then` if `c` is nonzero, otherwise to `else`. - (BrIf (c XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIf32 (c XReg) (taken MachLabel) (not_taken MachLabel)) ;; Compare-and-branch macro ops. (BrIfXeq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) @@ -77,70 +71,34 @@ (BrIfXult32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) (BrIfXulteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - ;; Register-to-register moves. - (Xmov (dst WritableXReg) (src XReg)) - (Fmov (dst WritableFReg) (src FReg)) - (Vmov (dst WritableVReg) (src VReg)) - - ;; Integer constants, zero-extended to 64 bits. - (Xconst8 (dst WritableXReg) (imm i8)) - (Xconst16 (dst WritableXReg) (imm i16)) - (Xconst32 (dst WritableXReg) (imm i32)) - (Xconst64 (dst WritableXReg) (imm i64)) - - ;; Integer arithmetic. - (Xadd32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xadd64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - - ;; Comparisons. - (Xeq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xneq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslt64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xult64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xulteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xeq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xneq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslt32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xslteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xult32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - (Xulteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) - ;; Load the memory address referenced by `mem` into `dst`. (LoadAddr (dst WritableXReg) (mem Amode)) - ;; Loads. - (Load (dst WritableReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) + ;; Load `ty` bytes from memory pointed to by `mem` and store in `dst`. + ;; + ;; How much is written to the register is defined by `ExtKind`. The `flags` + ;; control behavior such as endianness. + (XLoad (dst WritableXReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) + (FLoad (dst WritableFReg) (mem Amode) (ty Type) (flags MemFlags)) + (VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags)) ;; Stores. - (Store (mem Amode) (src Reg) (ty Type) (flags MemFlags)) - - ;; Bitcasts. - (BitcastIntFromFloat32 (dst WritableXReg) (src FReg)) - (BitcastIntFromFloat64 (dst WritableXReg) (src FReg)) - (BitcastFloatFromInt32 (dst WritableFReg) (src XReg)) - (BitcastFloatFromInt64 (dst WritableFReg) (src XReg)) - - ;; Stack manipulations - (PushFrame) - (PopFrame) - (StackAlloc32 (amt u32)) - (StackFree32 (amt u32)) - - ;; Sign extensions. - (Zext8 (dst WritableXReg) (src XReg)) - (Zext16 (dst WritableXReg) (src XReg)) - (Zext32 (dst WritableXReg) (src XReg)) - (Sext8 (dst WritableXReg) (src XReg)) - (Sext16 (dst WritableXReg) (src XReg)) - (Sext32 (dst WritableXReg) (src XReg)) - - ;; Byte swaps. - (Bswap32 (dst WritableXReg) (src XReg)) - (Bswap64 (dst WritableXReg) (src XReg)) + (XStore (mem Amode) (src XReg) (ty Type) (flags MemFlags)) + (FStore (mem Amode) (src FReg) (ty Type) (flags MemFlags)) + (VStore (mem Amode) (src VReg) (ty Type) (flags MemFlags)) + + ;; A raw pulley instruction generated at compile-time via Pulley's + ;; `for_each_op!` macro. This variant has `pulley_*` constructors to + ;; emit this instruction and auto-generated methods for other various + ;; bits and pieces of boilerplate in the backend. + (Raw (raw RawInst)) ) ) +(decl raw_inst_to_inst (RawInst) MInst) +(rule (raw_inst_to_inst inst) (MInst.Raw inst)) +(convert RawInst MInst raw_inst_to_inst) + (type BoxCallInfo (primitive BoxCallInfo)) (type BoxCallIndInfo (primitive BoxCallIndInfo)) @@ -150,13 +108,13 @@ (type Amode (enum - (SpOffset (offset i64)) - (RegOffset (base XReg) (offset i64)) + (SpOffset (offset i32)) + (RegOffset (base XReg) (offset i32)) (Stack (amode StackAMode)) ) ) -(type ExtKind (enum None Sign Zero)) +(type ExtKind (enum None Sign32 Sign64 Zero32 Zero64)) ;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -391,10 +349,6 @@ ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl pulley_trap (TrapCode) SideEffectNoResult) -(rule (pulley_trap code) - (SideEffectNoResult.Inst (MInst.Trap code))) - (decl pulley_trap_if (IntCC OperandSize XReg XReg TrapCode) SideEffectNoResult) (rule (pulley_trap_if cond size src1 src2 code) (SideEffectNoResult.Inst (MInst.TrapIf cond size src1 src2 code))) @@ -414,37 +368,13 @@ (_ Unit (emit (MInst.GetSpecial dst reg)))) dst)) -(decl pulley_xconst8 (i8) XReg) -(rule (pulley_xconst8 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst8 reg x)))) - reg)) - -(decl pulley_xconst16 (i16) XReg) -(rule (pulley_xconst16 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst16 reg x)))) - reg)) - -(decl pulley_xconst32 (i32) XReg) -(rule (pulley_xconst32 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst32 reg x)))) - reg)) - -(decl pulley_xconst64 (i64) XReg) -(rule (pulley_xconst64 x) - (let ((reg WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xconst64 reg x)))) - reg)) - (decl pulley_jump (MachLabel) SideEffectNoResult) (rule (pulley_jump label) (SideEffectNoResult.Inst (MInst.Jump label))) -(decl pulley_br_if (XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if c taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIf c taken not_taken))) +(decl pulley_br_if32 (XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if32 c taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIf32 c taken not_taken))) (decl pulley_br_if_xeq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) (rule (pulley_br_if_xeq32 a b taken not_taken) @@ -470,176 +400,30 @@ (rule (pulley_br_if_xulteq32 a b taken not_taken) (SideEffectNoResult.Inst (MInst.BrIfXulteq32 a b taken not_taken))) -(decl pulley_xadd32 (XReg XReg) XReg) -(rule (pulley_xadd32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xadd32 dst a b)))) - dst)) - -(decl pulley_xadd64 (XReg XReg) XReg) -(rule (pulley_xadd64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xadd64 dst a b)))) - dst)) - -(decl pulley_xeq64 (XReg XReg) XReg) -(rule (pulley_xeq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xeq64 dst a b)))) - dst)) - -(decl pulley_xneq64 (XReg XReg) XReg) -(rule (pulley_xneq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xneq64 dst a b)))) - dst)) - -(decl pulley_xslt64 (XReg XReg) XReg) -(rule (pulley_xslt64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslt64 dst a b)))) - dst)) - -(decl pulley_xslteq64 (XReg XReg) XReg) -(rule (pulley_xslteq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslteq64 dst a b)))) - dst)) - -(decl pulley_xult64 (XReg XReg) XReg) -(rule (pulley_xult64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xult64 dst a b)))) - dst)) - -(decl pulley_xulteq64 (XReg XReg) XReg) -(rule (pulley_xulteq64 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xulteq64 dst a b)))) - dst)) - -(decl pulley_xeq32 (XReg XReg) XReg) -(rule (pulley_xeq32 a b) +(decl pulley_xload (Amode Type MemFlags ExtKind) XReg) +(rule (pulley_xload amode ty flags ext) (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xeq32 dst a b)))) + (_ Unit (emit (MInst.XLoad dst amode ty flags ext)))) dst)) -(decl pulley_xneq32 (XReg XReg) XReg) -(rule (pulley_xneq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xneq32 dst a b)))) - dst)) - -(decl pulley_xslt32 (XReg XReg) XReg) -(rule (pulley_xslt32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslt32 dst a b)))) - dst)) +(decl pulley_xstore (Amode XReg Type MemFlags) SideEffectNoResult) +(rule (pulley_xstore amode src ty flags) + (SideEffectNoResult.Inst (MInst.XStore amode src ty flags))) -(decl pulley_xslteq32 (XReg XReg) XReg) -(rule (pulley_xslteq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xslteq32 dst a b)))) - dst)) - -(decl pulley_xult32 (XReg XReg) XReg) -(rule (pulley_xult32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xult32 dst a b)))) - dst)) - -(decl pulley_xulteq32 (XReg XReg) XReg) -(rule (pulley_xulteq32 a b) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Xulteq32 dst a b)))) - dst)) - -(decl pulley_load (Amode Type MemFlags ExtKind) Reg) -(rule (pulley_load amode ty flags ext) - (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.Load dst amode ty flags ext)))) - dst)) - -(decl pulley_store (Amode Reg Type MemFlags) SideEffectNoResult) -(rule (pulley_store amode src ty flags) - (SideEffectNoResult.Inst (MInst.Store amode src ty flags))) - -(decl pulley_bitcast_float_from_int_32 (XReg) FReg) -(rule (pulley_bitcast_float_from_int_32 src) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.BitcastFloatFromInt32 dst src)))) - dst)) - -(decl pulley_bitcast_float_from_int_64 (XReg) FReg) -(rule (pulley_bitcast_float_from_int_64 src) +(decl pulley_fload (Amode Type MemFlags) FReg) +(rule (pulley_fload amode ty flags) (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.BitcastFloatFromInt64 dst src)))) + (_ Unit (emit (MInst.FLoad dst amode ty flags)))) dst)) -(decl pulley_bitcast_int_from_float_32 (FReg) XReg) -(rule (pulley_bitcast_int_from_float_32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.BitcastIntFromFloat32 dst src)))) - dst)) - -(decl pulley_bitcast_int_from_float_64 (FReg) XReg) -(rule (pulley_bitcast_int_from_float_64 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.BitcastIntFromFloat64 dst src)))) - dst)) +(decl pulley_fstore (Amode FReg Type MemFlags) SideEffectNoResult) +(rule (pulley_fstore amode src ty flags) + (SideEffectNoResult.Inst (MInst.FStore amode src ty flags))) (decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit) (rule (gen_br_table idx default labels) (emit (MInst.BrTable idx default labels))) -(decl pulley_zext8 (XReg) XReg) -(rule (pulley_zext8 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext8 dst src)))) - dst)) - -(decl pulley_zext16 (XReg) XReg) -(rule (pulley_zext16 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext16 dst src)))) - dst)) - -(decl pulley_zext32 (XReg) XReg) -(rule (pulley_zext32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Zext32 dst src)))) - dst)) - -(decl pulley_sext8 (XReg) XReg) -(rule (pulley_sext8 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext8 dst src)))) - dst)) - -(decl pulley_sext16 (XReg) XReg) -(rule (pulley_sext16 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext16 dst src)))) - dst)) - -(decl pulley_sext32 (XReg) XReg) -(rule (pulley_sext32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Sext32 dst src)))) - dst)) - -(decl pulley_bswap32 (XReg) XReg) -(rule (pulley_bswap32 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Bswap32 dst src)))) - dst)) - -(decl pulley_bswap64 (XReg) XReg) -(rule (pulley_bswap64 src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Bswap64 dst src)))) - dst)) - ;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs index 0d6dc6161104..b00a0aa82b61 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs @@ -156,35 +156,38 @@ impl Amode { } } - pub(crate) fn get_base_register(&self) -> Option { + pub(crate) fn get_base_register(&self) -> Option { match self { Amode::RegOffset { base, offset: _ } => Some((*base).into()), - Amode::SpOffset { .. } | Amode::Stack { .. } => Some(stack_reg()), + Amode::SpOffset { .. } | Amode::Stack { .. } => Some(XReg::new(stack_reg()).unwrap()), } } - pub(crate) fn get_offset_with_state

(&self, state: &EmitState

) -> i64 + pub(crate) fn get_offset_with_state

(&self, state: &EmitState

) -> i32 where P: PulleyTargetKind, { match self { Amode::RegOffset { base: _, offset } | Amode::SpOffset { offset } => *offset, - Amode::Stack { amode } => match amode { - StackAMode::IncomingArg(offset, stack_args_size) => { - let offset = i64::from(*stack_args_size) - *offset; - let frame_layout = state.frame_layout(); - let sp_offset = frame_layout.tail_args_size - + frame_layout.setup_area_size - + frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size; - i64::from(sp_offset) - offset - } - StackAMode::Slot(offset) => { - offset + i64::from(state.frame_layout().outgoing_args_size) - } - StackAMode::OutgoingArg(offset) => *offset, - }, + Amode::Stack { amode } => { + let offset64 = match amode { + StackAMode::IncomingArg(offset, stack_args_size) => { + let offset = i64::from(*stack_args_size) - *offset; + let frame_layout = state.frame_layout(); + let sp_offset = frame_layout.tail_args_size + + frame_layout.setup_area_size + + frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size; + i64::from(sp_offset) - offset + } + StackAMode::Slot(offset) => { + offset + i64::from(state.frame_layout().outgoing_args_size) + } + StackAMode::OutgoingArg(offset) => *offset, + }; + i32::try_from(offset64).unwrap() + } } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 78c13ed96e74..463c920f4340 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -1,14 +1,13 @@ //! Pulley binary code emission. use super::*; -use crate::ir; +use crate::ir::{self, Endianness}; use crate::isa::pulley_shared::abi::PulleyMachineDeps; use crate::isa::pulley_shared::PointerWidth; use core::marker::PhantomData; use cranelift_control::ControlPlane; use pulley_interpreter::encode as enc; use pulley_interpreter::regs::BinaryOperands; -use pulley_interpreter::regs::Reg as _; pub struct EmitInfo { #[allow(dead_code)] // Will get used as we fill out this backend. @@ -28,6 +27,15 @@ impl EmitInfo { isa_flags, } } + + fn endianness(&self, flags: MemFlags) -> Endianness { + let target_endianness = if self.isa_flags.big_endian() { + Endianness::Big + } else { + Endianness::Little + }; + flags.endianness(target_endianness) + } } /// State carried between emissions of a sequence of instructions. @@ -124,11 +132,6 @@ fn pulley_emit

( // Pseduo-instructions that don't actually encode to anything. Inst::Args { .. } | Inst::Rets { .. } | Inst::Unwind { .. } => {} - Inst::Trap { code } => { - sink.add_trap(*code); - enc::trap(sink); - } - Inst::TrapIf { cond, size, @@ -180,8 +183,6 @@ fn pulley_emit

( Inst::GetSpecial { dst, reg } => enc::xmov(sink, dst, reg), - Inst::Ret => enc::ret(sink), - Inst::LoadExtName { .. } => todo!(), Inst::Call { info } => { @@ -246,7 +247,7 @@ fn pulley_emit

( enc::jump(sink, 0x00000000); } - Inst::BrIf { + Inst::BrIf32 { c, taken, not_taken, @@ -257,14 +258,14 @@ fn pulley_emit

( sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(2)); let mut inverted = SmallVec::<[u8; 16]>::new(); - enc::br_if_not(&mut inverted, c, 0x00000000); + enc::br_if_not32(&mut inverted, c, 0x00000000); debug_assert_eq!( inverted.len(), usize::try_from(taken_end - *start_offset).unwrap() ); sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted); - enc::br_if(sink, c, 0x00000000); + enc::br_if32(sink, c, 0x00000000); debug_assert_eq!(sink.cur_offset(), taken_end); // If not taken. @@ -384,46 +385,11 @@ fn pulley_emit

( ); } - Inst::Xmov { dst, src } => enc::xmov(sink, dst, src), - Inst::Fmov { dst, src } => enc::fmov(sink, dst, src), - Inst::Vmov { dst, src } => enc::vmov(sink, dst, src), - - Inst::Xconst8 { dst, imm } => enc::xconst8(sink, dst, *imm), - Inst::Xconst16 { dst, imm } => enc::xconst16(sink, dst, *imm), - Inst::Xconst32 { dst, imm } => enc::xconst32(sink, dst, *imm), - Inst::Xconst64 { dst, imm } => enc::xconst64(sink, dst, *imm), - - Inst::Xadd32 { dst, src1, src2 } => enc::xadd32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xadd64 { dst, src1, src2 } => enc::xadd64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xeq64 { dst, src1, src2 } => enc::xeq64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xneq64 { dst, src1, src2 } => enc::xneq64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslt64 { dst, src1, src2 } => enc::xslt64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslteq64 { dst, src1, src2 } => { - enc::xslteq64(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::Xult64 { dst, src1, src2 } => enc::xult64(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xulteq64 { dst, src1, src2 } => { - enc::xulteq64(sink, BinaryOperands::new(dst, src1, src2)) - } - - Inst::Xeq32 { dst, src1, src2 } => enc::xeq32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xneq32 { dst, src1, src2 } => enc::xneq32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslt32 { dst, src1, src2 } => enc::xslt32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xslteq32 { dst, src1, src2 } => { - enc::xslteq32(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::Xult32 { dst, src1, src2 } => enc::xult32(sink, BinaryOperands::new(dst, src1, src2)), - Inst::Xulteq32 { dst, src1, src2 } => { - enc::xulteq32(sink, BinaryOperands::new(dst, src1, src2)) - } - Inst::LoadAddr { dst, mem } => { let base = mem.get_base_register(); let offset = mem.get_offset_with_state(state); if let Some(base) = base { - let base = XReg::new(base).unwrap(); - if offset == 0 { enc::xmov(sink, dst, base); } else { @@ -431,10 +397,8 @@ fn pulley_emit

( enc::xconst8(sink, dst, offset); } else if let Ok(offset) = i16::try_from(offset) { enc::xconst16(sink, dst, offset); - } else if let Ok(offset) = i32::try_from(offset) { - enc::xconst32(sink, dst, offset); } else { - enc::xconst64(sink, dst, offset); + enc::xconst32(sink, dst, offset); } match P::pointer_width() { @@ -451,62 +415,171 @@ fn pulley_emit

( } } - Inst::Load { + Inst::XLoad { dst, mem, ty, - flags: _, + flags, ext, } => { + use Endianness as E; use ExtKind as X; let r = mem.get_base_register().unwrap(); - let r = reg_to_pulley_xreg(r); - let dst = reg_to_pulley_xreg(dst.to_reg()); let x = mem.get_offset_with_state(state); - match (*ext, *ty, i8::try_from(x)) { - (X::Sign, types::I32, Ok(0)) => enc::load32_s(sink, dst, r), - (X::Sign, types::I32, Ok(x)) => enc::load32_s_offset8(sink, dst, r, x), - (X::Sign, types::I32, Err(_)) => enc::load32_s_offset64(sink, dst, r, x), + let endian = emit_info.endianness(*flags); + match *ty { + I8 => match ext { + X::None | X::Zero32 => enc::xload8_u32_offset32(sink, dst, r, x), + X::Zero64 => enc::xload8_u64_offset32(sink, dst, r, x), + X::Sign32 => enc::xload8_s32_offset32(sink, dst, r, x), + X::Sign64 => enc::xload8_s64_offset32(sink, dst, r, x), + }, + I16 => match (ext, endian) { + (X::None | X::Zero32, E::Little) => { + enc::xload16le_u32_offset32(sink, dst, r, x); + } + (X::Sign32, E::Little) => { + enc::xload16le_s32_offset32(sink, dst, r, x); + } + (X::Zero64, E::Little) => { + enc::xload16le_u64_offset32(sink, dst, r, x); + } + (X::Sign64, E::Little) => { + enc::xload16le_s64_offset32(sink, dst, r, x); + } + (X::None | X::Zero32 | X::Zero64, E::Big) => { + enc::xload16be_u64_offset32(sink, dst, r, x); + } + (X::Sign32 | X::Sign64, E::Big) => { + enc::xload16be_s64_offset32(sink, dst, r, x); + } + }, + I32 => match (ext, endian) { + (X::None | X::Zero32 | X::Sign32, E::Little) => { + enc::xload32le_offset32(sink, dst, r, x); + } + (X::Zero64, E::Little) => { + enc::xload32le_u64_offset32(sink, dst, r, x); + } + (X::Sign64, E::Little) => { + enc::xload32le_s64_offset32(sink, dst, r, x); + } + (X::None | X::Zero32 | X::Zero64, E::Big) => { + enc::xload32be_u64_offset32(sink, dst, r, x); + } + (X::Sign32 | X::Sign64, E::Big) => { + enc::xload32be_s64_offset32(sink, dst, r, x); + } + }, + I64 => match endian { + E::Little => enc::xload64le_offset32(sink, dst, r, x), + E::Big => enc::xload64be_offset32(sink, dst, r, x), + }, + _ => unimplemented!("xload ty={ty:?}"), + } + } - (X::Zero, types::I32, Ok(0)) => enc::load32_u(sink, dst, r), - (X::Zero, types::I32, Ok(x)) => enc::load32_u_offset8(sink, dst, r, x), - (X::Zero, types::I32, Err(_)) => enc::load32_u_offset64(sink, dst, r, x), + Inst::FLoad { + dst, + mem, + ty, + flags, + } => { + use Endianness as E; + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + match *ty { + F32 => match endian { + E::Little => enc::fload32le_offset32(sink, dst, r, x), + E::Big => enc::fload32be_offset32(sink, dst, r, x), + }, + F64 => match endian { + E::Little => enc::fload64le_offset32(sink, dst, r, x), + E::Big => enc::fload64be_offset32(sink, dst, r, x), + }, + _ => unimplemented!("fload ty={ty:?}"), + } + } - (_, types::I64, Ok(0)) => enc::load64(sink, dst, r), - (_, types::I64, Ok(x)) => enc::load64_offset8(sink, dst, r, x), - (_, types::I64, Err(_)) => enc::load64_offset64(sink, dst, r, x), + Inst::VLoad { + dst, + mem, + ty, + flags, + } => { + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + assert_eq!(endian, Endianness::Little); + assert_eq!(ty.bytes(), 16); + enc::vload128le_offset32(sink, dst, r, x); + } - (..) => unimplemented!("load ext={ext:?} ty={ty}"), + Inst::XStore { + mem, + src, + ty, + flags, + } => { + use Endianness as E; + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + match *ty { + I8 => enc::xstore8_offset32(sink, r, x, src), + I16 => match endian { + E::Little => enc::xstore16le_offset32(sink, r, x, src), + E::Big => enc::xstore16be_offset32(sink, r, x, src), + }, + I32 => match endian { + E::Little => enc::xstore32le_offset32(sink, r, x, src), + E::Big => enc::xstore32be_offset32(sink, r, x, src), + }, + I64 => match endian { + E::Little => enc::xstore64le_offset32(sink, r, x, src), + E::Big => enc::xstore64be_offset32(sink, r, x, src), + }, + _ => unimplemented!("xstore ty={ty:?}"), } } - Inst::Store { + Inst::FStore { mem, src, ty, - flags: _, + flags, } => { + use Endianness as E; let r = mem.get_base_register().unwrap(); - let r = reg_to_pulley_xreg(r); - let src = reg_to_pulley_xreg(*src); let x = mem.get_offset_with_state(state); - match (*ty, i8::try_from(x)) { - (types::I32, Ok(0)) => enc::store32(sink, r, src), - (types::I32, Ok(x)) => enc::store32_offset8(sink, r, x, src), - (types::I32, Err(_)) => enc::store32_offset64(sink, r, x, src), - - (types::I64, Ok(0)) => enc::store64(sink, r, src), - (types::I64, Ok(x)) => enc::store64_offset8(sink, r, x, src), - (types::I64, Err(_)) => enc::store64_offset64(sink, r, x, src), - - (..) => todo!(), + let endian = emit_info.endianness(*flags); + match *ty { + F32 => match endian { + E::Little => enc::fstore32le_offset32(sink, r, x, src), + E::Big => enc::fstore32be_offset32(sink, r, x, src), + }, + F64 => match endian { + E::Little => enc::fstore64le_offset32(sink, r, x, src), + E::Big => enc::fstore64be_offset32(sink, r, x, src), + }, + _ => unimplemented!("fstore ty={ty:?}"), } } - Inst::BitcastIntFromFloat32 { dst, src } => enc::bitcast_int_from_float_32(sink, dst, src), - Inst::BitcastIntFromFloat64 { dst, src } => enc::bitcast_int_from_float_64(sink, dst, src), - Inst::BitcastFloatFromInt32 { dst, src } => enc::bitcast_float_from_int_32(sink, dst, src), - Inst::BitcastFloatFromInt64 { dst, src } => enc::bitcast_float_from_int_64(sink, dst, src), + Inst::VStore { + mem, + src, + ty, + flags, + } => { + let r = mem.get_base_register().unwrap(); + let x = mem.get_offset_with_state(state); + let endian = emit_info.endianness(*flags); + assert_eq!(endian, Endianness::Little); + assert_eq!(ty.bytes(), 16); + enc::vstore128le_offset32(sink, r, x, src); + } Inst::BrTable { idx, @@ -549,25 +622,27 @@ fn pulley_emit

( *start_offset = sink.cur_offset(); } - Inst::PushFrame => { - sink.add_trap(ir::TrapCode::STACK_OVERFLOW); - enc::push_frame(sink); - } - Inst::PopFrame => enc::pop_frame(sink), - Inst::StackAlloc32 { amt } => { - sink.add_trap(ir::TrapCode::STACK_OVERFLOW); - enc::stack_alloc32(sink, *amt); + Inst::Raw { raw } => { + match raw { + RawInst::PushFrame | RawInst::StackAlloc32 { .. } => { + sink.add_trap(ir::TrapCode::STACK_OVERFLOW); + } + RawInst::XDiv32U { .. } + | RawInst::XDiv64U { .. } + | RawInst::XRem32U { .. } + | RawInst::XRem64U { .. } => { + sink.add_trap(ir::TrapCode::INTEGER_DIVISION_BY_ZERO); + } + RawInst::XDiv32S { .. } + | RawInst::XDiv64S { .. } + | RawInst::XRem32S { .. } + | RawInst::XRem64S { .. } => { + sink.add_trap(ir::TrapCode::INTEGER_OVERFLOW); + } + _ => {} + } + super::generated::emit(raw, sink) } - Inst::StackFree32 { amt } => enc::stack_free32(sink, *amt), - - Inst::Zext8 { dst, src } => enc::zext8(sink, dst, src), - Inst::Zext16 { dst, src } => enc::zext16(sink, dst, src), - Inst::Zext32 { dst, src } => enc::zext32(sink, dst, src), - Inst::Sext8 { dst, src } => enc::sext8(sink, dst, src), - Inst::Sext16 { dst, src } => enc::sext16(sink, dst, src), - Inst::Sext32 { dst, src } => enc::sext32(sink, dst, src), - Inst::Bswap32 { dst, src } => enc::bswap32(sink, dst, src), - Inst::Bswap64 { dst, src } => enc::bswap64(sink, dst, src), } } @@ -607,7 +682,3 @@ fn br_if_cond_helper

( sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); enc::jump(sink, 0x00000000); } - -fn reg_to_pulley_xreg(r: Reg) -> pulley_interpreter::XReg { - pulley_interpreter::XReg::new(r.to_real_reg().unwrap().hw_enc()).unwrap() -} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 460ad9fe30db..41d82c2ed941 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -24,28 +24,76 @@ pub use self::emit::*; // Instructions (top level): definition pub use crate::isa::pulley_shared::lower::isle::generated_code::MInst as Inst; +pub use crate::isa::pulley_shared::lower::isle::generated_code::RawInst; + +impl From for Inst { + fn from(raw: RawInst) -> Inst { + Inst::Raw { raw } + } +} use super::PulleyTargetKind; +mod generated { + use super::*; + use crate::isa::pulley_shared::lower::isle::generated_code::RawInst; + + include!(concat!(env!("OUT_DIR"), "/pulley_inst_gen.rs")); +} + impl Inst { /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(dst: Writable, mem: Amode, ty: Type, flags: MemFlags) -> Inst { - Inst::Load { - dst, - mem, - ty, - flags, - ext: ExtKind::Zero, + if ty.is_vector() { + assert_eq!(ty.bytes(), 16); + Inst::VLoad { + dst: dst.map(|r| VReg::new(r).unwrap()), + mem, + ty, + flags, + } + } else if ty.is_int() { + Inst::XLoad { + dst: dst.map(|r| XReg::new(r).unwrap()), + mem, + ty, + flags, + ext: ExtKind::None, + } + } else { + Inst::FLoad { + dst: dst.map(|r| FReg::new(r).unwrap()), + mem, + ty, + flags, + } } } /// Generic constructor for a store. pub fn gen_store(mem: Amode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { - Inst::Store { - mem, - src: from_reg, - ty, - flags, + if ty.is_vector() { + assert_eq!(ty.bytes(), 16); + Inst::VStore { + mem, + src: VReg::new(from_reg).unwrap(), + ty, + flags, + } + } else if ty.is_int() { + Inst::XStore { + mem, + src: XReg::new(from_reg).unwrap(), + ty, + flags, + } + } else { + Inst::FStore { + mem, + src: FReg::new(from_reg).unwrap(), + ty, + flags, + } } } } @@ -62,11 +110,8 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_fixed_use(vreg, *preg); } } - Inst::Ret => { - unreachable!("`ret` is only added after regalloc") - } - Inst::Unwind { .. } | Inst::Trap { .. } | Inst::Nop => {} + Inst::Unwind { .. } | Inst::Nop => {} Inst::TrapIf { cond: _, @@ -119,7 +164,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { Inst::Jump { .. } => {} - Inst::BrIf { + Inst::BrIf32 { c, taken: _, not_taken: _, @@ -167,51 +212,12 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(src2); } - Inst::Xmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::Fmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - Inst::Vmov { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } - - Inst::Xconst8 { dst, imm: _ } - | Inst::Xconst16 { dst, imm: _ } - | Inst::Xconst32 { dst, imm: _ } - | Inst::Xconst64 { dst, imm: _ } => { - collector.reg_def(dst); - } - - Inst::Xadd32 { dst, src1, src2 } - | Inst::Xadd64 { dst, src1, src2 } - | Inst::Xeq64 { dst, src1, src2 } - | Inst::Xneq64 { dst, src1, src2 } - | Inst::Xslt64 { dst, src1, src2 } - | Inst::Xslteq64 { dst, src1, src2 } - | Inst::Xult64 { dst, src1, src2 } - | Inst::Xulteq64 { dst, src1, src2 } - | Inst::Xeq32 { dst, src1, src2 } - | Inst::Xneq32 { dst, src1, src2 } - | Inst::Xslt32 { dst, src1, src2 } - | Inst::Xslteq32 { dst, src1, src2 } - | Inst::Xult32 { dst, src1, src2 } - | Inst::Xulteq32 { dst, src1, src2 } => { - collector.reg_use(src1); - collector.reg_use(src2); - collector.reg_def(dst); - } - Inst::LoadAddr { dst, mem } => { collector.reg_def(dst); mem.get_operands(collector); } - Inst::Load { + Inst::XLoad { dst, mem, ty: _, @@ -222,7 +228,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands(collector); } - Inst::Store { + Inst::XStore { mem, src, ty: _, @@ -232,41 +238,51 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(src); } - Inst::BitcastIntFromFloat32 { dst, src } => { - collector.reg_use(src); + Inst::FLoad { + dst, + mem, + ty: _, + flags: _, + } => { collector.reg_def(dst); + mem.get_operands(collector); } - Inst::BitcastIntFromFloat64 { dst, src } => { + + Inst::FStore { + mem, + src, + ty: _, + flags: _, + } => { + mem.get_operands(collector); collector.reg_use(src); - collector.reg_def(dst); } - Inst::BitcastFloatFromInt32 { dst, src } => { - collector.reg_use(src); + + Inst::VLoad { + dst, + mem, + ty: _, + flags: _, + } => { collector.reg_def(dst); + mem.get_operands(collector); } - Inst::BitcastFloatFromInt64 { dst, src } => { + + Inst::VStore { + mem, + src, + ty: _, + flags: _, + } => { + mem.get_operands(collector); collector.reg_use(src); - collector.reg_def(dst); } Inst::BrTable { idx, .. } => { collector.reg_use(idx); } - Inst::StackAlloc32 { .. } | Inst::StackFree32 { .. } | Inst::PushFrame | Inst::PopFrame => { - } - - Inst::Zext8 { dst, src } - | Inst::Zext16 { dst, src } - | Inst::Zext32 { dst, src } - | Inst::Sext8 { dst, src } - | Inst::Sext16 { dst, src } - | Inst::Sext32 { dst, src } - | Inst::Bswap32 { dst, src } - | Inst::Bswap64 { dst, src } => { - collector.reg_use(src); - collector.reg_def(dst); - } + Inst::Raw { raw } => generated::get_operands(raw, collector), } } @@ -296,6 +312,18 @@ where } } +impl

From for InstAndKind

+where + P: PulleyTargetKind, +{ + fn from(inst: RawInst) -> Self { + Self { + inst: inst.into(), + kind: PhantomData, + } + } +} + impl

From> for Inst where P: PulleyTargetKind, @@ -348,7 +376,12 @@ where fn is_safepoint(&self) -> bool { match self.inst { - Inst::Trap { .. } => true, + Inst::Raw { + raw: RawInst::Trap { .. }, + } + | Inst::Call { .. } + | Inst::IndirectCall { .. } + | Inst::IndirectCallHost { .. } => true, _ => false, } } @@ -359,18 +392,22 @@ where fn is_move(&self) -> Option<(Writable, Reg)> { match self.inst { - Inst::Xmov { dst, src } => Some((Writable::from_reg(*dst.to_reg()), *src)), + Inst::Raw { + raw: RawInst::Xmov { dst, src }, + } => Some((Writable::from_reg(*dst.to_reg()), *src)), _ => None, } } fn is_included_in_clobbers(&self) -> bool { - self.is_args() + !self.is_args() } fn is_trap(&self) -> bool { match self.inst { - Inst::Trap { .. } => true, + Inst::Raw { + raw: RawInst::Trap { .. }, + } => true, _ => false, } } @@ -384,9 +421,12 @@ where fn is_term(&self) -> MachTerminator { match self.inst { - Inst::Ret { .. } | Inst::Rets { .. } => MachTerminator::Ret, + Inst::Raw { + raw: RawInst::Ret { .. }, + } + | Inst::Rets { .. } => MachTerminator::Ret, Inst::Jump { .. } => MachTerminator::Uncond, - Inst::BrIf { .. } + Inst::BrIf32 { .. } | Inst::BrIfXeq32 { .. } | Inst::BrIfXneq32 { .. } | Inst::BrIfXslt32 { .. } @@ -404,17 +444,17 @@ where fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self { match ty { - ir::types::I8 | ir::types::I16 | ir::types::I32 | ir::types::I64 => Inst::Xmov { + ir::types::I8 | ir::types::I16 | ir::types::I32 | ir::types::I64 => RawInst::Xmov { dst: WritableXReg::try_from(to_reg).unwrap(), src: XReg::new(from_reg).unwrap(), } .into(), - ir::types::F32 | ir::types::F64 => Inst::Fmov { + ir::types::F32 | ir::types::F64 => RawInst::Fmov { dst: WritableFReg::try_from(to_reg).unwrap(), src: FReg::new(from_reg).unwrap(), } .into(), - _ if ty.is_vector() => Inst::Vmov { + _ if ty.is_vector() => RawInst::Vmov { dst: WritableVReg::try_from(to_reg).unwrap(), src: VReg::new(from_reg).unwrap(), } @@ -542,8 +582,10 @@ impl Inst { let format_ext = |ext: ExtKind| -> &'static str { match ext { ExtKind::None => "", - ExtKind::Sign => "_s", - ExtKind::Zero => "_u", + ExtKind::Sign32 => "_s32", + ExtKind::Sign64 => "_s64", + ExtKind::Zero32 => "_u32", + ExtKind::Zero64 => "_u64", } }; @@ -569,8 +611,6 @@ impl Inst { Inst::Unwind { inst } => format!("unwind {inst:?}"), - Inst::Trap { code } => format!("trap // code = {code:?}"), - Inst::TrapIf { cond, size, @@ -585,8 +625,6 @@ impl Inst { Inst::Nop => format!("nop"), - Inst::Ret => format!("ret"), - Inst::GetSpecial { dst, reg } => { let dst = format_reg(*dst.to_reg()); let reg = format_reg(**reg); @@ -613,7 +651,7 @@ impl Inst { Inst::Jump { label } => format!("jump {}", label.to_string()), - Inst::BrIf { + Inst::BrIf32 { c, taken, not_taken, @@ -621,7 +659,7 @@ impl Inst { let c = format_reg(**c); let taken = taken.to_string(); let not_taken = not_taken.to_string(); - format!("br_if {c}, {taken}; jump {not_taken}") + format!("br_if32 {c}, {taken}; jump {not_taken}") } Inst::BrIfXeq32 { @@ -697,146 +735,27 @@ impl Inst { format!("br_if_xulteq32 {src1}, {src2}, {taken}; jump {not_taken}") } - Inst::Xmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = xmov {src}") - } - Inst::Fmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = fmov {src}") - } - Inst::Vmov { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = vmov {src}") - } - - Inst::Xconst8 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst8 {imm}") - } - Inst::Xconst16 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst16 {imm}") - } - Inst::Xconst32 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst32 {imm}") - } - Inst::Xconst64 { dst, imm } => { - let dst = format_reg(*dst.to_reg()); - format!("{dst} = xconst64 {imm}") - } - - Inst::Xadd32 { dst, src1, src2 } => format!( - "{} = xadd32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xadd64 { dst, src1, src2 } => format!( - "{} = xadd64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - - Inst::Xeq64 { dst, src1, src2 } => format!( - "{} = xeq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xneq64 { dst, src1, src2 } => format!( - "{} = xneq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslt64 { dst, src1, src2 } => format!( - "{} = xslt64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslteq64 { dst, src1, src2 } => format!( - "{} = xslteq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xult64 { dst, src1, src2 } => format!( - "{} = xult64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xulteq64 { dst, src1, src2 } => format!( - "{} = xulteq64 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xeq32 { dst, src1, src2 } => format!( - "{} = xeq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xneq32 { dst, src1, src2 } => format!( - "{} = xneq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslt32 { dst, src1, src2 } => format!( - "{} = xslt32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xslteq32 { dst, src1, src2 } => format!( - "{} = xslteq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xult32 { dst, src1, src2 } => format!( - "{} = xult32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::Xulteq32 { dst, src1, src2 } => format!( - "{} = xulteq32 {}, {}", - format_reg(*dst.to_reg()), - format_reg(**src1), - format_reg(**src2) - ), - Inst::LoadAddr { dst, mem } => { let dst = format_reg(*dst.to_reg()); let mem = mem.to_string(); format!("{dst} = load_addr {mem}") } - Inst::Load { + Inst::XLoad { dst, mem, ty, flags, ext, } => { - let dst = format_reg(dst.to_reg()); + let dst = format_reg(*dst.to_reg()); let ty = ty.bits(); let ext = format_ext(*ext); let mem = mem.to_string(); - format!("{dst} = load{ty}{ext} {mem} // flags ={flags}") + format!("{dst} = xload{ty}{ext} {mem} // flags ={flags}") } - Inst::Store { + Inst::XStore { mem, src, ty, @@ -844,29 +763,56 @@ impl Inst { } => { let ty = ty.bits(); let mem = mem.to_string(); - let src = format_reg(*src); - format!("store{ty} {mem}, {src} // flags = {flags}") + let src = format_reg(**src); + format!("xstore{ty} {mem}, {src} // flags = {flags}") } - Inst::BitcastIntFromFloat32 { dst, src } => { + Inst::FLoad { + dst, + mem, + ty, + flags, + } => { let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_int_from_float32 {src}") + let ty = ty.bits(); + let mem = mem.to_string(); + format!("{dst} = fload{ty} {mem} // flags ={flags}") } - Inst::BitcastIntFromFloat64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); + + Inst::FStore { + mem, + src, + ty, + flags, + } => { + let ty = ty.bits(); + let mem = mem.to_string(); let src = format_reg(**src); - format!("{dst} = bitcast_int_from_float64 {src}") + format!("fstore{ty} {mem}, {src} // flags = {flags}") } - Inst::BitcastFloatFromInt32 { dst, src } => { + + Inst::VLoad { + dst, + mem, + ty, + flags, + } => { let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("{dst} = bitcast_float_from_int32 {src}") + let ty = ty.bits(); + let mem = mem.to_string(); + format!("{dst} = vload{ty} {mem} // flags ={flags}") } - Inst::BitcastFloatFromInt64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); + + Inst::VStore { + mem, + src, + ty, + flags, + } => { + let ty = ty.bits(); + let mem = mem.to_string(); let src = format_reg(**src); - format!("{dst} = bitcast_float_from_int64 {src}") + format!("vstore{ty} {mem}, {src} // flags = {flags}") } Inst::BrTable { @@ -877,56 +823,7 @@ impl Inst { let idx = format_reg(**idx); format!("br_table {idx} {default:?} {targets:?}") } - - Inst::StackAlloc32 { amt } => { - format!("stack_alloc32 {amt:#x}") - } - Inst::StackFree32 { amt } => { - format!("stack_free32 {amt:#x}") - } - Inst::PushFrame => format!("push_frame"), - Inst::PopFrame => format!("pop_frame"), - - Inst::Zext8 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext8 {dst}, {src}") - } - Inst::Zext16 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext16 {dst}, {src}") - } - Inst::Zext32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("zext32 {dst}, {src}") - } - Inst::Sext8 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext8 {dst}, {src}") - } - Inst::Sext16 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext16 {dst}, {src}") - } - Inst::Sext32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("sext32 {dst}, {src}") - } - Inst::Bswap32 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("bswap32 {dst}, {src}") - } - Inst::Bswap64 { dst, src } => { - let dst = format_reg(*dst.to_reg()); - let src = format_reg(**src); - format!("bswap64 {dst}, {src}") - } + Inst::Raw { raw } => generated::print(raw), } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index dbbd6fad8fb8..342bcc344d08 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -6,6 +6,20 @@ ;;;; Rules for Control Flow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Helper to place a conditional `Value` provided into a register. Pulley +;; conditional values occupy the full low 32-bits of a register and so this +;; needs to handle situations such as when the `Value` is 64-bits an explicit +;; comparison must be made. Additionally if `Value` is smaller than 32-bits +;; then it must be sign-extended up to at least 32 bits. +(decl lower_cond (Value) XReg) +(rule (lower_cond val @ (value_type $I64)) (pulley_xneq64 val (pulley_xconst8 0))) +(rule (lower_cond val @ (value_type $I32)) val) +(rule (lower_cond val @ (value_type $I16)) (pulley_zext16 val)) +(rule (lower_cond val @ (value_type $I8)) (pulley_zext8 val)) + +;; Peel away explicit `uextend` values to take a look at the inner value. +(rule 1 (lower_cond (uextend val)) (lower_cond val)) + ;; The main control-flow-lowering term: takes a control-flow instruction and ;; target(s) and emits the necessary instructions. (decl partial lower_branch (Inst MachLabelSlice) Unit) @@ -15,8 +29,8 @@ (emit_side_effect (pulley_jump label))) ;; Generic case for conditional branches. -(rule -1 (lower_branch (brif (maybe_uextend c) _ _) (two_targets then else)) - (emit_side_effect (pulley_br_if c then else))) +(rule -1 (lower_branch (brif c _ _) (two_targets then else)) + (emit_side_effect (pulley_br_if32 (lower_cond c) then else))) ;; Conditional branches on `icmp`s. (rule (lower_branch (brif (maybe_uextend (icmp cc a b @ (value_type $I32))) _ _) @@ -153,6 +167,16 @@ (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (imm ty n)) +;;;; Rules for `f32const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u32_from_ieee32 x))) + (pulley_fconst32 x)) + +;;;; Rules for `f64const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 x))) + (pulley_fconst64 x)) + ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (iadd a b))) @@ -167,6 +191,90 @@ (rule (lower (has_type $I64 (iadd a b))) (pulley_xadd64 a b)) +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I16 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I32 (isub a b))) + (pulley_xsub32 a b)) + +(rule (lower (has_type $I64 (isub a b))) + (pulley_xsub64 a b)) + +;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sdiv a b))) (pulley_xdiv32_s a b)) +(rule (lower (has_type $I64 (sdiv a b))) (pulley_xdiv64_s a b)) + +;;;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (srem a b))) (pulley_xrem32_s a b)) +(rule (lower (has_type $I64 (srem a b))) (pulley_xrem64_s a b)) + +;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (udiv a b))) (pulley_xdiv32_u a b)) +(rule (lower (has_type $I64 (udiv a b))) (pulley_xdiv64_u a b)) + +;;;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (urem a b))) (pulley_xrem32_u a b)) +(rule (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b)) + +;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ishl a b))) + (pulley_xshl32 a b)) + +(rule (lower (has_type $I64 (ishl a b))) + (pulley_xshl64 a b)) + +;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ushr a b))) + (pulley_xshr32_u a b)) + +(rule (lower (has_type $I64 (ushr a b))) + (pulley_xshr64_u a b)) + +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sshr a b))) + (pulley_xshr32_s a b)) + +(rule (lower (has_type $I64 (sshr a b))) + (pulley_xshr64_s a b)) + +;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (fits_in_32 _) (band a b))) + (pulley_xand32 a b)) + +(rule 1 (lower (has_type $I64 (band a b))) + (pulley_xand64 a b)) + +;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (fits_in_32 _) (bor a b))) + (pulley_xor32 a b)) + +(rule 1 (lower (has_type $I64 (bor a b))) + (pulley_xor64 a b)) + +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ctz a))) (pulley_xctz32 a)) +(rule (lower (has_type $I64 (ctz a))) (pulley_xctz64 a)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (clz a))) (pulley_xclz32 a)) +(rule (lower (has_type $I64 (clz a))) (pulley_xclz64 a)) + ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (icmp cc a b @ (value_type $I64))) @@ -223,38 +331,95 @@ (rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b) (lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a)) -;;;; Rules for `load` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty)))) + (lower_fcmp ty cc a b)) + +(decl lower_fcmp (Type FloatCC Value Value) XReg) -(rule (lower (has_type ty (load flags addr (offset32 offset)))) - (let ((le Reg (pulley_load (Amode.RegOffset addr (i32_as_i64 offset)) - ty - flags - (ExtKind.Zero)))) - (xswap_if_be le ty flags))) +(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.Equal) a b) (pulley_feq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.NotEqual) a b) (pulley_fneq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.NotEqual) a b) (pulley_fneq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThan) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThan) a b) (pulley_flt64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThanOrEqual) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThanOrEqual) a b) (pulley_flt64 a b)) +;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered` `FloatCC` +;; conditions as that's not needed by wasm at this time. -;;;; Rules for `store` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the +;; operation. +(rule (lower_fcmp ty (FloatCC.GreaterThan) a b) + (lower_fcmp ty (FloatCC.LessThan) b a)) +(rule (lower_fcmp ty (FloatCC.GreaterThanOrEqual) a b) + (lower_fcmp ty (FloatCC.LessThanOrEqual) b a)) + +;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl amode (Value Offset32) Amode) +(rule (amode addr (offset32 offset)) (Amode.RegOffset addr offset)) + +(rule (lower (has_type (ty_int ty) (load flags addr offset))) + (pulley_xload (amode addr offset) ty flags (ExtKind.None))) + +(rule 1 (lower (has_type (ty_scalar_float ty) (load flags addr offset))) + (pulley_fload (amode addr offset) ty flags)) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Zero32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Zero32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.None))) + +(rule 1 (lower (has_type $I64 (uload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Zero64))) + +(rule 1 (lower (has_type $I64 (uload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Zero64))) + +(rule 1 (lower (has_type $I64 (uload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.Zero64))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Sign32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Sign32))) + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.None))) + +(rule 1 (lower (has_type $I64 (sload8 flags addr offset))) + (pulley_xload (amode addr offset) $I8 flags (ExtKind.Sign64))) + +(rule 1 (lower (has_type $I64 (sload16 flags addr offset))) + (pulley_xload (amode addr offset) $I16 flags (ExtKind.Sign64))) + +(rule 1 (lower (has_type $I64 (sload32 flags addr offset))) + (pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64))) + +;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (store flags src @ (value_type (ty_int ty)) addr offset)) + (side_effect (pulley_xstore (amode addr offset) src ty flags))) -(type Endianness extern (enum Big Little)) -(decl pure endianness (MemFlags) Endianness) -(extern constructor endianness endianness) +(rule 1 (lower (store flags src @ (value_type (ty_scalar_float ty)) addr offset)) + (side_effect (pulley_fstore (amode addr offset) src ty flags))) -(rule (lower (store flags src @ (value_type ty) addr (offset32 offset))) - (side_effect (pulley_store (Amode.RegOffset addr (i32_as_i64 offset)) - (xswap_if_be src ty flags) - ty - flags))) +(rule (lower (istore8 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I8 flags))) -(decl xswap_if_be (XReg Type MemFlags) XReg) -(rule (xswap_if_be val _ty flags) - (if-let (Endianness.Little) (endianness flags)) - val) -(rule (xswap_if_be val $I32 flags) - (if-let (Endianness.Big) (endianness flags)) - (pulley_bswap32 val)) -(rule (xswap_if_be val $I64 flags) - (if-let (Endianness.Big) (endianness flags)) - (pulley_bswap64 val)) +(rule (lower (istore16 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I16 flags))) + +(rule (lower (istore32 flags src addr offset)) + (side_effect (pulley_xstore (amode addr offset) src $I32 flags))) ;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -266,3 +431,52 @@ (let ((dst WritableXReg (temp_writable_xreg)) (_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) dst)) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I32)))) + (pulley_zext32 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I16)))) + (pulley_zext16 val)) + +(rule (lower (has_type (fits_in_64 _) (uextend val @ (value_type $I8)))) + (pulley_zext8 val)) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I8)))) + (pulley_sext8 val)) + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I16)))) + (pulley_sext16 val)) + +(rule (lower (has_type (fits_in_64 _) (sextend val @ (value_type $I32)))) + (pulley_sext32 val)) + +;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 _ty) (ireduce src))) + src) + +;;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (uadd_overflow_trap a b tc))) + (pulley_xadd32_uoverflow_trap a b tc)) + +(rule (lower (has_type $I64 (uadd_overflow_trap a b tc))) + (pulley_xadd64_uoverflow_trap a b tc)) + +;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (select c a b))) + (pulley_xselect32 (lower_cond c) a b)) + +(rule 1 (lower (has_type $I64 (select c a b))) + (pulley_xselect64 (lower_cond c) a b)) + +(rule 1 (lower (has_type $F32 (select c a b))) + (pulley_fselect32 (lower_cond c) a b)) + +(rule 1 (lower (has_type $F64 (select c a b))) + (pulley_fselect64 (lower_cond c) a b)) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs index cbe78d7d31d0..ae61bbc18fbc 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -114,10 +114,6 @@ where fn lr_reg(&mut self) -> XReg { XReg::new(regs::lr_reg()).unwrap() } - - fn endianness(&mut self, flags: MemFlags) -> Endianness { - flags.endianness(self.backend.target_endianness()) - } } /// The main entry point for lowering with ISLE. diff --git a/cranelift/codegen/src/isa/pulley_shared/mod.rs b/cranelift/codegen/src/isa/pulley_shared/mod.rs index faf3a3ca72ef..5387fb3ae01c 100644 --- a/cranelift/codegen/src/isa/pulley_shared/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/mod.rs @@ -7,7 +7,6 @@ mod settings; use self::inst::EmitInfo; use super::{Builder as IsaBuilder, FunctionAlignment}; -use crate::ir::Endianness; use crate::{ dominator_tree::DominatorTree, ir, @@ -125,14 +124,6 @@ where let abi = abi::PulleyCallee::new(func, self, &self.isa_flags, &sigs)?; machinst::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) } - - pub fn target_endianness(&self) -> Endianness { - if self.isa_flags.big_endian() { - Endianness::Big - } else { - Endianness::Little - } - } } impl

TargetIsa for PulleyBackend

diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml index b6e0df9814f2..7736ff247903 100644 --- a/cranelift/filetests/Cargo.toml +++ b/cranelift/filetests/Cargo.toml @@ -37,4 +37,4 @@ serde = { workspace = true } serde_derive = { workspace = true } cranelift.workspace = true smallvec = { workspace = true } -pulley-interpreter = { workspace = true, features = ["disas", "std"] } +pulley-interpreter = { workspace = true, features = ["disas", "std", "interp"] } diff --git a/cranelift/filetests/filetests/isa/pulley32/br_table.clif b/cranelift/filetests/filetests/isa/pulley32/br_table.clif index 624b808192c1..c9f6844ad463 100644 --- a/cranelift/filetests/filetests/isa/pulley32/br_table.clif +++ b/cranelift/filetests/filetests/isa/pulley32/br_table.clif @@ -34,19 +34,19 @@ block5(v5: i32): ; block2: ; jump label4 ; block3: -; x5 = xconst8 3 +; xconst8 x5, 3 ; jump label7 ; block4: -; x5 = xconst8 2 +; xconst8 x5, 2 ; jump label7 ; block5: -; x5 = xconst8 1 +; xconst8 x5, 1 ; jump label7 ; block6: -; x5 = xconst8 4 +; xconst8 x5, 4 ; jump label7 ; block7: -; x0 = xadd32 x0, x5 +; xadd32 x0, x0, x5 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif index 5ccc545b7371..3e51993a7630 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif @@ -19,10 +19,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -50,10 +50,10 @@ block2: ; block0: ; br_if_xneq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -81,10 +81,10 @@ block2: ; block0: ; br_if_xult32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -112,10 +112,10 @@ block2: ; block0: ; br_if_xulteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -143,10 +143,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -174,10 +174,10 @@ block2: ; block0: ; br_if_xslteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xult32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -236,10 +236,10 @@ block2: ; block0: ; br_if_xulteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -267,10 +267,10 @@ block2: ; block0: ; br_if_xslt32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -298,10 +298,10 @@ block2: ; block0: ; br_if_xslteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -330,10 +330,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index 36c279c54d22..73059c7a65d9 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -16,16 +16,18 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x4, x0 +; br_if32 x4, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext8 x4, x0 +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -46,16 +48,18 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext16 x4, x0 +; br_if32 x4, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext16 x4, x0 +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -76,16 +80,16 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; br_if32 x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; br_if32 x0, 0xa // target = 0xa ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -106,16 +110,20 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -137,18 +145,20 @@ block2: ; VCode: ; block0: -; x5 = xeq32 x0, x1 -; br_if x5, label2; jump label1 +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xeq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -170,18 +180,20 @@ block2: ; VCode: ; block0: -; x5 = xneq32 x0, x1 -; br_if x5, label2; jump label1 +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xneq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -205,10 +217,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -234,18 +246,20 @@ block2: ; VCode: ; block0: -; x5 = xulteq64 x1, x0 -; br_if x5, label2; jump label1 +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xulteq64 x5, x1, x0 -; br_if x5, 0xa // target = 0xd +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6a162d085b50..449043f0bdab 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -15,9 +15,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -42,9 +42,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -71,10 +71,10 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 -; x1 = xconst8 1 -; x2 = xconst8 2 -; x3 = xconst8 3 +; xconst8 x0, 0 +; xconst8 x1, 1 +; xconst8 x2, 2 +; xconst8 x3, 3 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret @@ -104,9 +104,9 @@ block0: ; push_frame ; block0: ; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x4 = xadd64 x0, x2 -; x3 = xadd64 x1, x3 -; x0 = xadd64 x4, x3 +; xadd64 x4, x0, x2 +; xadd64 x3, x1, x3 +; xadd64 x0, x4, x3 ; pop_frame ; ret ; @@ -130,32 +130,32 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x30 +; stack_alloc32 48 ; block0: -; x15 = xconst8 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xconst8 x15, 0 +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x30 +; stack_free32 48 ; pop_frame ; ret ; @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x4d +; call 0x0 // target = 0x60 ; stack_free32 48 ; pop_frame ; ret @@ -227,64 +227,79 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 -; store64 sp+56, x18 // flags = notrap aligned -; store64 sp+48, x20 // flags = notrap aligned +; stack_alloc32 112 +; xstore64 sp+104, x18 // flags = notrap aligned +; xstore64 sp+96, x19 // flags = notrap aligned +; xstore64 sp+88, x20 // flags = notrap aligned +; xstore64 sp+80, x21 // flags = notrap aligned +; xstore64 sp+72, x23 // flags = notrap aligned +; xstore64 sp+64, x24 // flags = notrap aligned +; xstore64 sp+56, x25 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x18 = xmov x13 -; x20 = xmov x11 -; x24 = load64_u OutgoingArg(0) // flags = notrap aligned -; x11 = load64_u OutgoingArg(8) // flags = notrap aligned -; x13 = load64_u OutgoingArg(16) // flags = notrap aligned -; x19 = load64_u OutgoingArg(24) // flags = notrap aligned -; x21 = load64_u OutgoingArg(32) // flags = notrap aligned -; x25 = xadd64 x0, x1 -; x23 = xadd64 x2, x3 -; x5 = xadd64 x4, x5 -; x6 = xadd64 x6, x7 -; x7 = xadd64 x8, x9 -; x0 = xmov x20 -; x4 = xadd64 x10, x0 -; x10 = xmov x18 -; x8 = xadd64 x12, x10 -; x14 = xadd64 x14, x15 -; x15 = xadd64 x24, x11 -; x13 = xadd64 x11, x13 -; x0 = xadd64 x19, x21 -; x1 = xadd64 x25, x23 -; x2 = xadd64 x5, x6 -; x3 = xadd64 x7, x4 -; x14 = xadd64 x8, x14 -; x13 = xadd64 x15, x13 -; x15 = xadd64 x0, x0 -; x0 = xadd64 x1, x2 -; x14 = xadd64 x3, x14 -; x13 = xadd64 x13, x15 -; x14 = xadd64 x0, x14 -; x13 = xadd64 x13, x13 -; x0 = xadd64 x14, x13 -; x18 = load64_u sp+56 // flags = notrap aligned -; x20 = load64_u sp+48 // flags = notrap aligned -; stack_free32 0x40 +; xmov x18, x13 +; xmov x20, x11 +; x24 = xload64 OutgoingArg(0) // flags = notrap aligned +; x11 = xload64 OutgoingArg(8) // flags = notrap aligned +; x13 = xload64 OutgoingArg(16) // flags = notrap aligned +; x19 = xload64 OutgoingArg(24) // flags = notrap aligned +; x21 = xload64 OutgoingArg(32) // flags = notrap aligned +; xadd64 x25, x0, x1 +; xadd64 x23, x2, x3 +; xadd64 x5, x4, x5 +; xadd64 x6, x6, x7 +; xadd64 x7, x8, x9 +; xmov x0, x20 +; xadd64 x4, x10, x0 +; xmov x10, x18 +; xadd64 x8, x12, x10 +; xadd64 x14, x14, x15 +; xadd64 x15, x24, x11 +; xadd64 x13, x11, x13 +; xadd64 x0, x19, x21 +; xadd64 x1, x25, x23 +; xadd64 x2, x5, x6 +; xadd64 x3, x7, x4 +; xadd64 x14, x8, x14 +; xadd64 x13, x15, x13 +; xadd64 x15, x0, x0 +; xadd64 x0, x1, x2 +; xadd64 x14, x3, x14 +; xadd64 x13, x13, x15 +; xadd64 x14, x0, x14 +; xadd64 x13, x13, x13 +; xadd64 x0, x14, x13 +; x18 = xload64 sp+104 // flags = notrap aligned +; x19 = xload64 sp+96 // flags = notrap aligned +; x20 = xload64 sp+88 // flags = notrap aligned +; x21 = xload64 sp+80 // flags = notrap aligned +; x23 = xload64 sp+72 // flags = notrap aligned +; x24 = xload64 sp+64 // flags = notrap aligned +; x25 = xload64 sp+56 // flags = notrap aligned +; stack_free32 112 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; stack_alloc32 64 -; store64_offset8 sp, 56, x18 -; store64_offset8 sp, 48, x20 +; stack_alloc32 112 +; xstore64le_offset32 sp, 104, x18 +; xstore64le_offset32 sp, 96, x19 +; xstore64le_offset32 sp, 88, x20 +; xstore64le_offset32 sp, 80, x21 +; xstore64le_offset32 sp, 72, x23 +; xstore64le_offset32 sp, 64, x24 +; xstore64le_offset32 sp, 56, x25 ; xmov x0, sp -; call 0x0 // target = 0x11 +; call 0x0 // target = 0x3a ; xmov x18, x13 ; xmov x20, x11 -; load64 x24, sp -; load64_offset8 x11, sp, 8 -; load64_offset8 x13, sp, 16 -; load64_offset8 x19, sp, 24 -; load64_offset8 x21, sp, 32 +; xload64le_offset32 x24, sp, 0 +; xload64le_offset32 x11, sp, 8 +; xload64le_offset32 x13, sp, 16 +; xload64le_offset32 x19, sp, 24 +; xload64le_offset32 x21, sp, 32 ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -310,9 +325,14 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; load64_offset8 x18, sp, 56 -; load64_offset8 x20, sp, 48 -; stack_free32 64 +; xload64le_offset32 x18, sp, 104 +; xload64le_offset32 x19, sp, 96 +; xload64le_offset32 x20, sp, 88 +; xload64le_offset32 x21, sp, 80 +; xload64le_offset32 x23, sp, 72 +; xload64le_offset32 x24, sp, 64 +; xload64le_offset32 x25, sp, 56 +; stack_free32 112 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/iadd.clif b/cranelift/filetests/filetests/isa/pulley32/iadd.clif index 1bde49304a87..908eb0662544 100644 --- a/cranelift/filetests/filetests/isa/pulley32/iadd.clif +++ b/cranelift/filetests/filetests/isa/pulley32/iadd.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xadd64 x0, x1 +; xadd64 x0, x0, x1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/icmp.clif b/cranelift/filetests/filetests/isa/pulley32/icmp.clif index 6926ca1400f7..8f2363f9e7db 100644 --- a/cranelift/filetests/filetests/isa/pulley32/icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley32/icmp.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xeq64 x0, x1 +; xeq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -69,7 +69,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -99,7 +99,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -114,7 +114,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xneq64 x0, x1 +; xneq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -129,7 +129,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -144,7 +144,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -159,7 +159,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -174,7 +174,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x0, x1 +; xult64 x0, x0, x1 ; ret ; ; Disassembled: @@ -189,7 +189,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -204,7 +204,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -219,7 +219,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -234,7 +234,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x0, x1 +; xulteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -249,7 +249,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -264,7 +264,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -279,7 +279,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -294,7 +294,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x0, x1 +; xslt64 x0, x0, x1 ; ret ; ; Disassembled: @@ -309,7 +309,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -324,7 +324,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -339,7 +339,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -354,7 +354,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x0, x1 +; xslteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -369,7 +369,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -384,7 +384,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -399,7 +399,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -414,7 +414,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x1, x0 +; xult64 x0, x1, x0 ; ret ; ; Disassembled: @@ -429,7 +429,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -444,7 +444,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -459,7 +459,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -474,7 +474,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x1, x0 +; xslt64 x0, x1, x0 ; ret ; ; Disassembled: @@ -489,7 +489,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -504,7 +504,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -519,7 +519,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -534,7 +534,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x1, x0 +; xulteq64 x0, x1, x0 ; ret ; ; Disassembled: @@ -549,7 +549,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -564,7 +564,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -579,7 +579,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -594,7 +594,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x1, x0 +; xslteq64 x0, x1, x0 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/iconst.clif b/cranelift/filetests/filetests/isa/pulley32/iconst.clif index 4ab8a5e48a7d..1aba8a5bf023 100644 --- a/cranelift/filetests/filetests/isa/pulley32/iconst.clif +++ b/cranelift/filetests/filetests/isa/pulley32/iconst.clif @@ -9,7 +9,7 @@ block0: ; VCode: ; block0: -; x0 = xconst16 255 +; xconst16 x0, 255 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 65535 +; xconst32 x0, 65535 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 -1 +; xconst32 x0, -1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0: ; VCode: ; block0: -; x0 = xconst64 -1 +; xconst64 x0, -1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley32/jump.clif b/cranelift/filetests/filetests/isa/pulley32/jump.clif index 6475294e95d1..d1029117857b 100644 --- a/cranelift/filetests/filetests/isa/pulley32/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley32/jump.clif @@ -19,20 +19,22 @@ block3(v3: i8): ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x5, x0 +; br_if32 x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; jump label3 ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; jump label3 ; block3: ; ret ; ; Disassembled: -; br_if x0, 0xe // target = 0xe +; zext8 x5, x0 +; br_if32 x5, 0xe // target = 0x11 ; xconst8 x0, 0 -; jump 0x8 // target = 0x11 +; jump 0x8 // target = 0x14 ; xconst8 x0, 1 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/load.clif b/cranelift/filetests/filetests/isa/pulley32/load.clif index 3df82d3cbd03..82cc4c52aac8 100644 --- a/cranelift/filetests/filetests/isa/pulley32/load.clif +++ b/cranelift/filetests/filetests/isa/pulley32/load.clif @@ -9,11 +9,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load32_u x0+0 // flags = +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x0, x0 +; xload32le_offset32 x0, x0, 0 ; ret function %load_i64(i32) -> i64 { @@ -24,11 +24,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load64_u x0+0 // flags = +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x0, x0 +; xload64le_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i32) -> i32 { @@ -39,11 +39,11 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load32_u x0+4 // flags = +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x0, x0, 4 +; xload32le_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i32) -> i64 { @@ -54,10 +54,10 @@ block0(v0: i32): ; VCode: ; block0: -; x0 = load64_u x0+8 // flags = +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x0, x0, 8 +; xload64le_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif b/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif index ef6087e21c79..5c77917cf15c 100644 --- a/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif +++ b/cranelift/filetests/filetests/isa/pulley32/stack_addr.clif @@ -10,10 +10,10 @@ block0(): ; VCode: ; push_frame -; stack_alloc32 0x10 +; stack_alloc32 16 ; block0: ; x0 = load_addr Slot(0) -; stack_free32 0x10 +; stack_free32 16 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley32/store.clif b/cranelift/filetests/filetests/isa/pulley32/store.clif index abefe343ca0e..5f87a2c2491d 100644 --- a/cranelift/filetests/filetests/isa/pulley32/store.clif +++ b/cranelift/filetests/filetests/isa/pulley32/store.clif @@ -9,11 +9,11 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; store32 x1+0, x0 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store32 x1, x0 +; xstore32le_offset32 x1, 0, x0 ; ret function %store_i64(i64, i32) { @@ -24,11 +24,11 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; store64 x1+0, x0 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store64 x1, x0 +; xstore64le_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i32) { @@ -39,11 +39,11 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; store32 x1+4, x0 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; store32_offset8 x1, 4, x0 +; xstore32le_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i32) { @@ -54,10 +54,10 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; store64 x1+8, x0 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; store64_offset8 x1, 8, x0 +; xstore64le_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index 03a23b970e5e..8d5da4749bf1 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -8,7 +8,7 @@ block0: ; VCode: ; block0: -; trap // code = TrapCode(1) +; trap // trap=TrapCode(1) ; ; Disassembled: ; trap @@ -23,7 +23,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -43,7 +43,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -63,7 +63,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -83,7 +83,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -110,21 +110,25 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; ret ; block2: -; x5 = xconst8 42 -; x6 = xconst8 0 -; trap_if ne, Size64, x5, x6 // code = TrapCode(1) +; xconst8 x7, 42 +; xconst8 x8, 0 +; trap_if ne, Size64, x7, x8 // code = TrapCode(1) ; ret ; ; Disassembled: -; br_if x0, 0x7 // target = 0x7 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x7 // target = 0xd ; ret -; xconst8 x5, 42 -; xconst8 x6, 0 -; br_if_xneq64 x5, x6, 0x8 // target = 0x15 +; xconst8 x7, 42 +; xconst8 x8, 0 +; br_if_xneq64 x7, x8, 0x8 // target = 0x1b ; ret ; trap @@ -145,20 +149,24 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: -; x4 = xconst8 0 -; x5 = xconst8 0 -; trap_if eq, Size64, x4, x5 // code = TrapCode(1) +; xconst8 x6, 0 +; xconst8 x7, 0 +; trap_if eq, Size64, x6, x7 // code = TrapCode(1) ; ret ; block2: ; ret ; ; Disassembled: -; br_if x0, 0x14 // target = 0x14 ; xconst8 x4, 0 -; xconst8 x5, 0 -; br_if_xeq64 x4, x5, 0x9 // target = 0x15 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x14 // target = 0x1a +; xconst8 x6, 0 +; xconst8 x7, 0 +; br_if_xeq64 x6, x7, 0x9 // target = 0x1b ; ret ; ret ; trap diff --git a/cranelift/filetests/filetests/isa/pulley64/br_table.clif b/cranelift/filetests/filetests/isa/pulley64/br_table.clif index 8c334abc9be4..3adf03c6fedd 100644 --- a/cranelift/filetests/filetests/isa/pulley64/br_table.clif +++ b/cranelift/filetests/filetests/isa/pulley64/br_table.clif @@ -34,19 +34,19 @@ block5(v5: i32): ; block2: ; jump label4 ; block3: -; x5 = xconst8 3 +; xconst8 x5, 3 ; jump label7 ; block4: -; x5 = xconst8 2 +; xconst8 x5, 2 ; jump label7 ; block5: -; x5 = xconst8 1 +; xconst8 x5, 1 ; jump label7 ; block6: -; x5 = xconst8 4 +; xconst8 x5, 4 ; jump label7 ; block7: -; x0 = xadd32 x0, x5 +; xadd32 x0, x0, x5 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif index eeb01081a103..8a7ab52dd562 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif @@ -19,10 +19,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -50,10 +50,10 @@ block2: ; block0: ; br_if_xneq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -81,10 +81,10 @@ block2: ; block0: ; br_if_xult32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -112,10 +112,10 @@ block2: ; block0: ; br_if_xulteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -143,10 +143,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -174,10 +174,10 @@ block2: ; block0: ; br_if_xslteq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -205,10 +205,10 @@ block2: ; block0: ; br_if_xult32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -236,10 +236,10 @@ block2: ; block0: ; br_if_xulteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -267,10 +267,10 @@ block2: ; block0: ; br_if_xslt32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -298,10 +298,10 @@ block2: ; block0: ; br_if_xslteq32 x1, x0, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: @@ -330,10 +330,10 @@ block2: ; block0: ; br_if_xeq32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; block2: -; x0 = xconst8 2 +; xconst8 x0, 2 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index a1b47f037589..d8ae5981d49f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -16,16 +16,18 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x4, x0 +; br_if32 x4, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext8 x4, x0 +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -46,16 +48,18 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext16 x4, x0 +; br_if32 x4, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; zext16 x4, x0 +; br_if32 x4, 0xa // target = 0xd ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -76,16 +80,16 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; br_if32 x0, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; br_if32 x0, 0xa // target = 0xa ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -106,16 +110,20 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; br_if x0, 0xa // target = 0xa +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -137,18 +145,20 @@ block2: ; VCode: ; block0: -; x5 = xeq32 x0, x1 -; br_if x5, label2; jump label1 +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xeq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xeq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -170,18 +180,20 @@ block2: ; VCode: ; block0: -; x5 = xneq32 x0, x1 -; br_if x5, label2; jump label1 +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xneq32 x5, x0, x1 -; br_if x5, 0xa // target = 0xd +; xneq32 x6, x0, x1 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -205,10 +217,10 @@ block2: ; block0: ; br_if_xslt32 x0, x1, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: @@ -234,18 +246,20 @@ block2: ; VCode: ; block0: -; x5 = xulteq64 x1, x0 -; br_if x5, label2; jump label1 +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if32 x6, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; ret ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; ret ; ; Disassembled: -; xulteq64 x5, x1, x0 -; br_if x5, 0xa // target = 0xd +; xulteq64 x6, x1, x0 +; zext8 x6, x6 +; br_if32 x6, 0xa // target = 0x10 ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 9470d04a4a02..67b401aa3b5f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -15,9 +15,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -42,9 +42,9 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 +; xconst8 x0, 0 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x0 = xconst8 1 +; xconst8 x0, 1 ; pop_frame ; ret ; @@ -71,10 +71,10 @@ block0: ; VCode: ; push_frame ; block0: -; x0 = xconst8 0 -; x1 = xconst8 1 -; x2 = xconst8 2 -; x3 = xconst8 3 +; xconst8 x0, 0 +; xconst8 x1, 1 +; xconst8 x2, 2 +; xconst8 x3, 3 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret @@ -104,9 +104,9 @@ block0: ; push_frame ; block0: ; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x4 = xadd64 x0, x2 -; x3 = xadd64 x1, x3 -; x0 = xadd64 x4, x3 +; xadd64 x4, x0, x2 +; xadd64 x3, x1, x3 +; xadd64 x0, x4, x3 ; pop_frame ; ret ; @@ -130,32 +130,32 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x30 +; stack_alloc32 48 ; block0: -; x15 = xconst8 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xconst8 x15, 0 +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x30 +; stack_free32 48 ; pop_frame ; ret ; @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x4d +; call 0x0 // target = 0x60 ; stack_free32 48 ; pop_frame ; ret @@ -227,64 +227,79 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 -; store64 sp+56, x18 // flags = notrap aligned -; store64 sp+48, x20 // flags = notrap aligned +; stack_alloc32 112 +; xstore64 sp+104, x18 // flags = notrap aligned +; xstore64 sp+96, x19 // flags = notrap aligned +; xstore64 sp+88, x20 // flags = notrap aligned +; xstore64 sp+80, x21 // flags = notrap aligned +; xstore64 sp+72, x23 // flags = notrap aligned +; xstore64 sp+64, x24 // flags = notrap aligned +; xstore64 sp+56, x25 // flags = notrap aligned ; block0: ; x0 = load_addr OutgoingArg(0) ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; x18 = xmov x13 -; x20 = xmov x11 -; x24 = load64_u OutgoingArg(0) // flags = notrap aligned -; x11 = load64_u OutgoingArg(8) // flags = notrap aligned -; x13 = load64_u OutgoingArg(16) // flags = notrap aligned -; x19 = load64_u OutgoingArg(24) // flags = notrap aligned -; x21 = load64_u OutgoingArg(32) // flags = notrap aligned -; x25 = xadd64 x0, x1 -; x23 = xadd64 x2, x3 -; x5 = xadd64 x4, x5 -; x6 = xadd64 x6, x7 -; x7 = xadd64 x8, x9 -; x0 = xmov x20 -; x4 = xadd64 x10, x0 -; x10 = xmov x18 -; x8 = xadd64 x12, x10 -; x14 = xadd64 x14, x15 -; x15 = xadd64 x24, x11 -; x13 = xadd64 x11, x13 -; x0 = xadd64 x19, x21 -; x1 = xadd64 x25, x23 -; x2 = xadd64 x5, x6 -; x3 = xadd64 x7, x4 -; x14 = xadd64 x8, x14 -; x13 = xadd64 x15, x13 -; x15 = xadd64 x0, x0 -; x0 = xadd64 x1, x2 -; x14 = xadd64 x3, x14 -; x13 = xadd64 x13, x15 -; x14 = xadd64 x0, x14 -; x13 = xadd64 x13, x13 -; x0 = xadd64 x14, x13 -; x18 = load64_u sp+56 // flags = notrap aligned -; x20 = load64_u sp+48 // flags = notrap aligned -; stack_free32 0x40 +; xmov x18, x13 +; xmov x20, x11 +; x24 = xload64 OutgoingArg(0) // flags = notrap aligned +; x11 = xload64 OutgoingArg(8) // flags = notrap aligned +; x13 = xload64 OutgoingArg(16) // flags = notrap aligned +; x19 = xload64 OutgoingArg(24) // flags = notrap aligned +; x21 = xload64 OutgoingArg(32) // flags = notrap aligned +; xadd64 x25, x0, x1 +; xadd64 x23, x2, x3 +; xadd64 x5, x4, x5 +; xadd64 x6, x6, x7 +; xadd64 x7, x8, x9 +; xmov x0, x20 +; xadd64 x4, x10, x0 +; xmov x10, x18 +; xadd64 x8, x12, x10 +; xadd64 x14, x14, x15 +; xadd64 x15, x24, x11 +; xadd64 x13, x11, x13 +; xadd64 x0, x19, x21 +; xadd64 x1, x25, x23 +; xadd64 x2, x5, x6 +; xadd64 x3, x7, x4 +; xadd64 x14, x8, x14 +; xadd64 x13, x15, x13 +; xadd64 x15, x0, x0 +; xadd64 x0, x1, x2 +; xadd64 x14, x3, x14 +; xadd64 x13, x13, x15 +; xadd64 x14, x0, x14 +; xadd64 x13, x13, x13 +; xadd64 x0, x14, x13 +; x18 = xload64 sp+104 // flags = notrap aligned +; x19 = xload64 sp+96 // flags = notrap aligned +; x20 = xload64 sp+88 // flags = notrap aligned +; x21 = xload64 sp+80 // flags = notrap aligned +; x23 = xload64 sp+72 // flags = notrap aligned +; x24 = xload64 sp+64 // flags = notrap aligned +; x25 = xload64 sp+56 // flags = notrap aligned +; stack_free32 112 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; stack_alloc32 64 -; store64_offset8 sp, 56, x18 -; store64_offset8 sp, 48, x20 +; stack_alloc32 112 +; xstore64le_offset32 sp, 104, x18 +; xstore64le_offset32 sp, 96, x19 +; xstore64le_offset32 sp, 88, x20 +; xstore64le_offset32 sp, 80, x21 +; xstore64le_offset32 sp, 72, x23 +; xstore64le_offset32 sp, 64, x24 +; xstore64le_offset32 sp, 56, x25 ; xmov x0, sp -; call 0x0 // target = 0x11 +; call 0x0 // target = 0x3a ; xmov x18, x13 ; xmov x20, x11 -; load64 x24, sp -; load64_offset8 x11, sp, 8 -; load64_offset8 x13, sp, 16 -; load64_offset8 x19, sp, 24 -; load64_offset8 x21, sp, 32 +; xload64le_offset32 x24, sp, 0 +; xload64le_offset32 x11, sp, 8 +; xload64le_offset32 x13, sp, 16 +; xload64le_offset32 x19, sp, 24 +; xload64le_offset32 x21, sp, 32 ; xadd64 x25, x0, x1 ; xadd64 x23, x2, x3 ; xadd64 x5, x4, x5 @@ -310,9 +325,14 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; load64_offset8 x18, sp, 56 -; load64_offset8 x20, sp, 48 -; stack_free32 64 +; xload64le_offset32 x18, sp, 104 +; xload64le_offset32 x19, sp, 96 +; xload64le_offset32 x20, sp, 88 +; xload64le_offset32 x21, sp, 80 +; xload64le_offset32 x23, sp, 72 +; xload64le_offset32 x24, sp, 64 +; xload64le_offset32 x25, sp, 56 +; stack_free32 112 ; pop_frame ; ret @@ -356,34 +376,34 @@ block0: ; VCode: ; push_frame -; stack_alloc32 0x40 +; stack_alloc32 64 ; block0: -; x15 = xconst8 0 -; store64 OutgoingArg(0), x15 // flags = notrap aligned -; store64 OutgoingArg(8), x15 // flags = notrap aligned -; store64 OutgoingArg(16), x15 // flags = notrap aligned -; store64 OutgoingArg(24), x15 // flags = notrap aligned -; store64 OutgoingArg(32), x15 // flags = notrap aligned -; store64 OutgoingArg(40), x15 // flags = notrap aligned -; store64 OutgoingArg(48), x15 // flags = notrap aligned -; store64 OutgoingArg(56), x15 // flags = notrap aligned -; x0 = xmov x15 -; x1 = xmov x15 -; x2 = xmov x15 -; x3 = xmov x15 -; x4 = xmov x15 -; x5 = xmov x15 -; x6 = xmov x15 -; x7 = xmov x15 -; x8 = xmov x15 -; x9 = xmov x15 -; x10 = xmov x15 -; x11 = xmov x15 -; x12 = xmov x15 -; x13 = xmov x15 -; x14 = xmov x15 +; xconst8 x15, 0 +; xstore64 OutgoingArg(0), x15 // flags = notrap aligned +; xstore64 OutgoingArg(8), x15 // flags = notrap aligned +; xstore64 OutgoingArg(16), x15 // flags = notrap aligned +; xstore64 OutgoingArg(24), x15 // flags = notrap aligned +; xstore64 OutgoingArg(32), x15 // flags = notrap aligned +; xstore64 OutgoingArg(40), x15 // flags = notrap aligned +; xstore64 OutgoingArg(48), x15 // flags = notrap aligned +; xstore64 OutgoingArg(56), x15 // flags = notrap aligned +; xmov x0, x15 +; xmov x1, x15 +; xmov x2, x15 +; xmov x3, x15 +; xmov x4, x15 +; xmov x5, x15 +; xmov x6, x15 +; xmov x7, x15 +; xmov x8, x15 +; xmov x9, x15 +; xmov x10, x15 +; xmov x11, x15 +; xmov x12, x15 +; xmov x13, x15 +; xmov x14, x15 ; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; stack_free32 0x40 +; stack_free32 64 ; pop_frame ; ret ; @@ -391,14 +411,14 @@ block0: ; push_frame ; stack_alloc32 64 ; xconst8 x15, 0 -; store64 sp, x15 -; store64_offset8 sp, 8, x15 -; store64_offset8 sp, 16, x15 -; store64_offset8 sp, 24, x15 -; store64_offset8 sp, 32, x15 -; store64_offset8 sp, 40, x15 -; store64_offset8 sp, 48, x15 -; store64_offset8 sp, 56, x15 +; xstore64le_offset32 sp, 0, x15 +; xstore64le_offset32 sp, 8, x15 +; xstore64le_offset32 sp, 16, x15 +; xstore64le_offset32 sp, 24, x15 +; xstore64le_offset32 sp, 32, x15 +; xstore64le_offset32 sp, 40, x15 +; xstore64le_offset32 sp, 48, x15 +; xstore64le_offset32 sp, 56, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -414,7 +434,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x55 +; call 0x0 // target = 0x6e ; stack_free32 64 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/iadd.clif b/cranelift/filetests/filetests/isa/pulley64/iadd.clif index dd49ed0c3735..0ce8cc0122b4 100644 --- a/cranelift/filetests/filetests/isa/pulley64/iadd.clif +++ b/cranelift/filetests/filetests/isa/pulley64/iadd.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xadd32 x0, x1 +; xadd32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xadd64 x0, x1 +; xadd64 x0, x0, x1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/icmp.clif b/cranelift/filetests/filetests/isa/pulley64/icmp.clif index 3343d2f7d981..badfa73b3ceb 100644 --- a/cranelift/filetests/filetests/isa/pulley64/icmp.clif +++ b/cranelift/filetests/filetests/isa/pulley64/icmp.clif @@ -9,7 +9,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xeq32 x0, x1 +; xeq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xeq64 x0, x1 +; xeq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -69,7 +69,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -99,7 +99,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xneq32 x0, x1 +; xneq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -114,7 +114,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xneq64 x0, x1 +; xneq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -129,7 +129,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -144,7 +144,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -159,7 +159,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x0, x1 +; xult32 x0, x0, x1 ; ret ; ; Disassembled: @@ -174,7 +174,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x0, x1 +; xult64 x0, x0, x1 ; ret ; ; Disassembled: @@ -189,7 +189,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -204,7 +204,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -219,7 +219,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x0, x1 +; xulteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -234,7 +234,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x0, x1 +; xulteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -249,7 +249,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -264,7 +264,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -279,7 +279,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x0, x1 +; xslt32 x0, x0, x1 ; ret ; ; Disassembled: @@ -294,7 +294,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x0, x1 +; xslt64 x0, x0, x1 ; ret ; ; Disassembled: @@ -309,7 +309,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -324,7 +324,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -339,7 +339,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x0, x1 +; xslteq32 x0, x0, x1 ; ret ; ; Disassembled: @@ -354,7 +354,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x0, x1 +; xslteq64 x0, x0, x1 ; ret ; ; Disassembled: @@ -369,7 +369,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -384,7 +384,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -399,7 +399,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xult32 x1, x0 +; xult32 x0, x1, x0 ; ret ; ; Disassembled: @@ -414,7 +414,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xult64 x1, x0 +; xult64 x0, x1, x0 ; ret ; ; Disassembled: @@ -429,7 +429,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -444,7 +444,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -459,7 +459,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslt32 x1, x0 +; xslt32 x0, x1, x0 ; ret ; ; Disassembled: @@ -474,7 +474,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslt64 x1, x0 +; xslt64 x0, x1, x0 ; ret ; ; Disassembled: @@ -489,7 +489,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -504,7 +504,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -519,7 +519,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xulteq32 x1, x0 +; xulteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -534,7 +534,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xulteq64 x1, x0 +; xulteq64 x0, x1, x0 ; ret ; ; Disassembled: @@ -549,7 +549,7 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -564,7 +564,7 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -579,7 +579,7 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; x0 = xslteq32 x1, x0 +; xslteq32 x0, x1, x0 ; ret ; ; Disassembled: @@ -594,7 +594,7 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; x0 = xslteq64 x1, x0 +; xslteq64 x0, x1, x0 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/iconst.clif b/cranelift/filetests/filetests/isa/pulley64/iconst.clif index 5a143c3db352..8583ebf4d6f3 100644 --- a/cranelift/filetests/filetests/isa/pulley64/iconst.clif +++ b/cranelift/filetests/filetests/isa/pulley64/iconst.clif @@ -9,7 +9,7 @@ block0: ; VCode: ; block0: -; x0 = xconst16 255 +; xconst16 x0, 255 ; ret ; ; Disassembled: @@ -24,7 +24,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 65535 +; xconst32 x0, 65535 ; ret ; ; Disassembled: @@ -39,7 +39,7 @@ block0: ; VCode: ; block0: -; x0 = xconst32 -1 +; xconst32 x0, -1 ; ret ; ; Disassembled: @@ -54,7 +54,7 @@ block0: ; VCode: ; block0: -; x0 = xconst64 -1 +; xconst64 x0, -1 ; ret ; ; Disassembled: diff --git a/cranelift/filetests/filetests/isa/pulley64/jump.clif b/cranelift/filetests/filetests/isa/pulley64/jump.clif index 4c22b5acc953..a4b187bcc47f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/jump.clif +++ b/cranelift/filetests/filetests/isa/pulley64/jump.clif @@ -19,20 +19,22 @@ block3(v3: i8): ; VCode: ; block0: -; br_if x0, label2; jump label1 +; zext8 x5, x0 +; br_if32 x5, label2; jump label1 ; block1: -; x0 = xconst8 0 +; xconst8 x0, 0 ; jump label3 ; block2: -; x0 = xconst8 1 +; xconst8 x0, 1 ; jump label3 ; block3: ; ret ; ; Disassembled: -; br_if x0, 0xe // target = 0xe +; zext8 x5, x0 +; br_if32 x5, 0xe // target = 0x11 ; xconst8 x0, 0 -; jump 0x8 // target = 0x11 +; jump 0x8 // target = 0x14 ; xconst8 x0, 1 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/load.clif b/cranelift/filetests/filetests/isa/pulley64/load.clif index e39daa3ababc..3482e2c7d280 100644 --- a/cranelift/filetests/filetests/isa/pulley64/load.clif +++ b/cranelift/filetests/filetests/isa/pulley64/load.clif @@ -9,11 +9,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load32_u x0+0 // flags = +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x0, x0 +; xload32le_offset32 x0, x0, 0 ; ret function %load_i64(i64) -> i64 { @@ -24,11 +24,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load64_u x0+0 // flags = +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x0, x0 +; xload64le_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i64) -> i32 { @@ -39,11 +39,11 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load32_u x0+4 // flags = +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x0, x0, 4 +; xload32le_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i64) -> i64 { @@ -54,10 +54,10 @@ block0(v0: i64): ; VCode: ; block0: -; x0 = load64_u x0+8 // flags = +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x0, x0, 8 +; xload64le_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/loadbe.clif b/cranelift/filetests/filetests/isa/pulley64/loadbe.clif index ef8c6e0f2f4c..a0ad10694a85 100644 --- a/cranelift/filetests/filetests/isa/pulley64/loadbe.clif +++ b/cranelift/filetests/filetests/isa/pulley64/loadbe.clif @@ -9,13 +9,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load32_u x0+0 // flags = -; bswap32 x0, x2 +; x0 = xload32 x0+0 // flags = ; ret ; ; Disassembled: -; load32_u x2, x0 -; bswap32 x0, x2 +; xload32be_u64_offset32 x0, x0, 0 ; ret function %load_i64(i64) -> i64 { @@ -26,13 +24,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load64_u x0+0 // flags = -; bswap64 x0, x2 +; x0 = xload64 x0+0 // flags = ; ret ; ; Disassembled: -; load64 x2, x0 -; bswap64 x0, x2 +; xload64be_offset32 x0, x0, 0 ; ret function %load_i32_with_offset(i64) -> i32 { @@ -43,13 +39,11 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load32_u x0+4 // flags = -; bswap32 x0, x2 +; x0 = xload32 x0+4 // flags = ; ret ; ; Disassembled: -; load32_u_offset8 x2, x0, 4 -; bswap32 x0, x2 +; xload32be_u64_offset32 x0, x0, 4 ; ret function %load_i64_with_offset(i64) -> i64 { @@ -60,12 +54,10 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = load64_u x0+8 // flags = -; bswap64 x0, x2 +; x0 = xload64 x0+8 // flags = ; ret ; ; Disassembled: -; load64_offset8 x2, x0, 8 -; bswap64 x0, x2 +; xload64be_offset32 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif b/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif index b9190587fc21..2f658f4b4802 100644 --- a/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif +++ b/cranelift/filetests/filetests/isa/pulley64/stack_addr.clif @@ -10,10 +10,10 @@ block0(): ; VCode: ; push_frame -; stack_alloc32 0x10 +; stack_alloc32 16 ; block0: ; x0 = load_addr Slot(0) -; stack_free32 0x10 +; stack_free32 16 ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/store.clif b/cranelift/filetests/filetests/isa/pulley64/store.clif index 966ee460658e..67cdf9763aa4 100644 --- a/cranelift/filetests/filetests/isa/pulley64/store.clif +++ b/cranelift/filetests/filetests/isa/pulley64/store.clif @@ -9,11 +9,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; store32 x1+0, x0 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store32 x1, x0 +; xstore32le_offset32 x1, 0, x0 ; ret function %store_i64(i64, i64) { @@ -24,11 +24,11 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; store64 x1+0, x0 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; store64 x1, x0 +; xstore64le_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i64) { @@ -39,11 +39,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; store32 x1+4, x0 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; store32_offset8 x1, 4, x0 +; xstore32le_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i64) { @@ -54,10 +54,10 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; store64 x1+8, x0 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; store64_offset8 x1, 8, x0 +; xstore64le_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/storebe.clif b/cranelift/filetests/filetests/isa/pulley64/storebe.clif index d9d59f5c5a94..5f8b1270c152 100644 --- a/cranelift/filetests/filetests/isa/pulley64/storebe.clif +++ b/cranelift/filetests/filetests/isa/pulley64/storebe.clif @@ -9,13 +9,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; bswap32 x3, x0 -; store32 x1+0, x3 // flags = +; xstore32 x1+0, x0 // flags = ; ret ; ; Disassembled: -; bswap32 x3, x0 -; store32 x1, x3 +; xstore32be_offset32 x1, 0, x0 ; ret function %store_i64(i64, i64) { @@ -26,13 +24,11 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; bswap64 x3, x0 -; store64 x1+0, x3 // flags = +; xstore64 x1+0, x0 // flags = ; ret ; ; Disassembled: -; bswap64 x3, x0 -; store64 x1, x3 +; xstore64be_offset32 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i64) { @@ -43,13 +39,11 @@ block0(v0: i32, v1: i64): ; VCode: ; block0: -; bswap32 x3, x0 -; store32 x1+4, x3 // flags = +; xstore32 x1+4, x0 // flags = ; ret ; ; Disassembled: -; bswap32 x3, x0 -; store32_offset8 x1, 4, x3 +; xstore32be_offset32 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i64) { @@ -60,12 +54,10 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; bswap64 x3, x0 -; store64 x1+8, x3 // flags = +; xstore64 x1+8, x0 // flags = ; ret ; ; Disassembled: -; bswap64 x3, x0 -; store64_offset8 x1, 8, x3 +; xstore64be_offset32 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index 23e569a23a23..ed68dbdf1665 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -8,7 +8,7 @@ block0: ; VCode: ; block0: -; trap // code = TrapCode(1) +; trap // trap=TrapCode(1) ; ; Disassembled: ; trap @@ -23,7 +23,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -43,7 +43,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -63,7 +63,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if eq, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -83,7 +83,7 @@ block0(v0: i64): ; VCode: ; block0: -; x2 = xconst8 42 +; xconst8 x2, 42 ; trap_if ne, Size64, x0, x2 // code = TrapCode(1) ; ret ; @@ -110,21 +110,25 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: ; ret ; block2: -; x5 = xconst8 42 -; x6 = xconst8 0 -; trap_if ne, Size64, x5, x6 // code = TrapCode(1) +; xconst8 x7, 42 +; xconst8 x8, 0 +; trap_if ne, Size64, x7, x8 // code = TrapCode(1) ; ret ; ; Disassembled: -; br_if x0, 0x7 // target = 0x7 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x7 // target = 0xd ; ret -; xconst8 x5, 42 -; xconst8 x6, 0 -; br_if_xneq64 x5, x6, 0x8 // target = 0x15 +; xconst8 x7, 42 +; xconst8 x8, 0 +; br_if_xneq64 x7, x8, 0x8 // target = 0x1b ; ret ; trap @@ -145,20 +149,24 @@ block2: ; VCode: ; block0: -; br_if x0, label2; jump label1 +; xconst8 x4, 0 +; xneq64 x6, x0, x4 +; br_if32 x6, label2; jump label1 ; block1: -; x4 = xconst8 0 -; x5 = xconst8 0 -; trap_if eq, Size64, x4, x5 // code = TrapCode(1) +; xconst8 x6, 0 +; xconst8 x7, 0 +; trap_if eq, Size64, x6, x7 // code = TrapCode(1) ; ret ; block2: ; ret ; ; Disassembled: -; br_if x0, 0x14 // target = 0x14 ; xconst8 x4, 0 -; xconst8 x5, 0 -; br_if_xeq64 x4, x5, 0x9 // target = 0x15 +; xneq64 x6, x0, x4 +; br_if32 x6, 0x14 // target = 0x1a +; xconst8 x6, 0 +; xconst8 x7, 0 +; br_if_xeq64 x6, x7, 0x9 // target = 0x1b ; ret ; ret ; trap diff --git a/cranelift/filetests/filetests/runtests/br.clif b/cranelift/filetests/filetests/runtests/br.clif index 689682ebd594..7308c78bf66e 100644 --- a/cranelift/filetests/filetests/runtests/br.clif +++ b/cranelift/filetests/filetests/runtests/br.clif @@ -5,6 +5,10 @@ target s390x target x86_64 target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %jump() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/brif.clif b/cranelift/filetests/filetests/runtests/brif.clif index 60648d569cb2..54dea5ee6f26 100644 --- a/cranelift/filetests/filetests/runtests/brif.clif +++ b/cranelift/filetests/filetests/runtests/brif.clif @@ -5,6 +5,10 @@ target s390x target x86_64 target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %brif_value(i8) -> i64 { block0(v0: i8): diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index bc00b1be7dbc..4b9a43bf48a4 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -13,9 +13,12 @@ use cranelift_jit::{JITBuilder, JITModule}; use cranelift_module::{FuncId, Linkage, Module, ModuleError}; use cranelift_native::builder_with_options; use cranelift_reader::TestFile; +use pulley_interpreter::interp as pulley; use std::cmp::max; use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::ptr::NonNull; +use target_lexicon::Architecture; use thiserror::Error; const TESTFILE_NAMESPACE: u32 = 0; @@ -370,12 +373,45 @@ impl<'a> Trampoline<'a> { let function_ptr = self.module.get_finalized_function(self.func_id); let trampoline_ptr = self.module.get_finalized_function(self.trampoline_id); - let callable_trampoline: fn(*const u8, *mut u128) -> () = - unsafe { mem::transmute(trampoline_ptr) }; - callable_trampoline(function_ptr, arguments_address); + unsafe { + self.call_raw(trampoline_ptr, function_ptr, arguments_address); + } values.collect_returns(&self.func_signature) } + + unsafe fn call_raw( + &self, + trampoline_ptr: *const u8, + function_ptr: *const u8, + arguments_address: *mut u128, + ) { + match self.module.isa().triple().architecture { + // For the pulley target this is pulley bytecode, not machine code, + // so run the interpreter. + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be => { + let mut state = pulley::Vm::new(); + state.call( + NonNull::new(trampoline_ptr.cast_mut()).unwrap(), + &[ + pulley::XRegVal::new_ptr(function_ptr.cast_mut()).into(), + pulley::XRegVal::new_ptr(arguments_address).into(), + ], + [], + ); + } + + // Other targets natively execute this machine code. + _ => { + let callable_trampoline: fn(*const u8, *mut u128) -> () = + unsafe { mem::transmute(trampoline_ptr) }; + callable_trampoline(function_ptr, arguments_address); + } + } + } } /// Compilation Error when compiling a function. diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 1e7e977c2b24..aee78c164446 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -59,8 +59,23 @@ fn is_isa_compatible( let requested_arch = requested.triple().architecture; match (host_arch, requested_arch) { + // If the host matches the requested target, then that's all good. (host, requested) if host == requested => {} + + // Allow minor differences in risc-v targets. (Architecture::Riscv64(_), Architecture::Riscv64(_)) => {} + + // Any host can run pulley so long as the pointer width and endianness + // match. + ( + _, + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be, + ) if host.triple().pointer_width() == requested.triple().pointer_width() + && host.triple().endianness() == requested.triple().endianness() => {} + _ => { return Err(format!( "skipped {file_path}: host can't run {requested_arch:?} programs" @@ -72,6 +87,10 @@ fn is_isa_compatible( // we can't natively support on the host. let requested_flags = requested.isa_flags(); for req_value in requested_flags { + // pointer_width for pulley already validated above + if req_value.name == "pointer_width" { + continue; + } let requested = match req_value.as_bool() { Some(requested) => requested, None => unimplemented!("ISA flag {} of kind {:?}", req_value.name, req_value.kind()), @@ -116,11 +135,26 @@ fn compile_testfile( flags: &Flags, isa: &dyn TargetIsa, ) -> anyhow::Result { - // We can't use the requested ISA directly since it does not contain info - // about the operating system / calling convention / etc.. - // - // Copy the requested ISA flags into the host ISA and use that. - let isa = build_host_isa(false, flags.clone(), isa.isa_flags()); + let isa = match isa.triple().architecture { + // Convert `&dyn TargetIsa` to `OwnedTargetIsa` by re-making the ISA and + // applying pulley flags/etc. + Architecture::Pulley32 + | Architecture::Pulley64 + | Architecture::Pulley32be + | Architecture::Pulley64be => { + let mut builder = cranelift_codegen::isa::lookup(isa.triple().clone())?; + for value in isa.isa_flags() { + builder.set(value.name, &value.value_string()).unwrap(); + } + builder.finish(flags.clone())? + } + + // We can't use the requested ISA directly since it does not contain info + // about the operating system / calling convention / etc.. + // + // Copy the requested ISA flags into the host ISA and use that. + _ => build_host_isa(false, flags.clone(), isa.isa_flags()), + }; let mut tfc = TestFileCompiler::new(isa); tfc.add_testfile(testfile)?; diff --git a/crates/c-api/include/wasmtime/instance.h b/crates/c-api/include/wasmtime/instance.h index 0d63cfb7a421..8c57d6ab2153 100644 --- a/crates/c-api/include/wasmtime/instance.h +++ b/crates/c-api/include/wasmtime/instance.h @@ -150,7 +150,7 @@ wasmtime_instance_pre_delete(wasmtime_instance_pre_t *instance_pre); * values are owned by the caller. */ WASM_API_EXTERN wasmtime_error_t *wasmtime_instance_pre_instantiate( - const wasmtime_instance_pre_t *instance_pre, wasmtime_store_t *store, + const wasmtime_instance_pre_t *instance_pre, wasmtime_context_t *store, wasmtime_instance_t *instance, wasm_trap_t **trap_ptr); /** diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 16912288d188..4f8bd8637e99 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -361,6 +361,8 @@ wasmtime_option_group! { pub custom_page_sizes: Option, /// Configure support for the wide-arithmetic proposal. pub wide_arithmetic: Option, + /// Configure support for the extended-const proposal. + pub extended_const: Option, } enum Wasm { @@ -909,6 +911,9 @@ impl CommonOptions { if let Some(enable) = self.wasm.wide_arithmetic.or(all) { config.wasm_wide_arithmetic(enable); } + if let Some(enable) = self.wasm.extended_const.or(all) { + config.wasm_extended_const(enable); + } macro_rules! handle_conditionally_compiled { ($(($feature:tt, $field:tt, $method:tt))*) => ($( diff --git a/crates/cranelift/src/gc/enabled/drc.rs b/crates/cranelift/src/gc/enabled/drc.rs index 47b066607238..412e644cd292 100644 --- a/crates/cranelift/src/gc/enabled/drc.rs +++ b/crates/cranelift/src/gc/enabled/drc.rs @@ -301,6 +301,7 @@ impl GcCompiler for DrcCompiler { init: super::ArrayInit<'_>, ) -> WasmResult { let interned_type_index = func_env.module.types[array_type_index]; + let ptr_ty = func_env.pointer_type(); let len_offset = gc_compiler(func_env)?.layouts().array_length_field_offset(); let array_layout = func_env.array_layout(interned_type_index).clone(); @@ -338,9 +339,7 @@ impl GcCompiler for DrcCompiler { .store(ir::MemFlags::trusted(), len, len_addr, 0); // Finally, initialize the elements. - let len_to_elems_delta = builder - .ins() - .iconst(ir::types::I64, i64::from(len_to_elems_delta)); + let len_to_elems_delta = builder.ins().iconst(ptr_ty, i64::from(len_to_elems_delta)); let elems_addr = builder.ins().iadd(len_addr, len_to_elems_delta); init.initialize( func_env, diff --git a/crates/cranelift/src/gc/enabled/null.rs b/crates/cranelift/src/gc/enabled/null.rs index a5367893a4ee..bf478f82f511 100644 --- a/crates/cranelift/src/gc/enabled/null.rs +++ b/crates/cranelift/src/gc/enabled/null.rs @@ -155,6 +155,7 @@ impl GcCompiler for NullCompiler { init: super::ArrayInit<'_>, ) -> WasmResult { let interned_type_index = func_env.module.types[array_type_index]; + let ptr_ty = func_env.pointer_type(); let len_offset = gc_compiler(func_env)?.layouts().array_length_field_offset(); let array_layout = func_env.array_layout(interned_type_index).clone(); @@ -190,9 +191,7 @@ impl GcCompiler for NullCompiler { .store(ir::MemFlags::trusted(), len, len_addr, 0); // Finally, initialize the elements. - let len_to_elems_delta = builder - .ins() - .iconst(ir::types::I64, i64::from(len_to_elems_delta)); + let len_to_elems_delta = builder.ins().iconst(ptr_ty, i64::from(len_to_elems_delta)); let elems_addr = builder.ins().iadd(len_addr, len_to_elems_delta); init.initialize( func_env, diff --git a/crates/cranelift/src/translate/code_translator/bounds_checks.rs b/crates/cranelift/src/translate/code_translator/bounds_checks.rs index b2379e335c1f..49d8ee40545d 100644 --- a/crates/cranelift/src/translate/code_translator/bounds_checks.rs +++ b/crates/cranelift/src/translate/code_translator/bounds_checks.rs @@ -155,6 +155,16 @@ pub fn bounds_check_and_compute_addr( return Ok(Unreachable); } + // Special case: if this is a 32-bit platform and the `offset_and_size` + // overflows the 32-bit address space then there's no hope of this ever + // being in-bounds. We can't represent `offset_and_size` in CLIF as the + // native pointer type anyway, so this is an unconditional trap. + if pointer_bit_width < 64 && offset_and_size >= (1 << pointer_bit_width) { + env.before_unconditionally_trapping_memory_access(builder)?; + env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); + return Ok(Unreachable); + } + // Special case for when we can completely omit explicit // bounds checks for 32-bit memories. // @@ -481,11 +491,29 @@ fn cast_index_to_pointer_ty( if index_ty == pointer_ty { return index; } - // Note that using 64-bit heaps on a 32-bit host is not currently supported, - // would require at least a bounds check here to ensure that the truncation - // from 64-to-32 bits doesn't lose any upper bits. For now though we're - // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast. - assert!(index_ty.bits() < pointer_ty.bits()); + + // If the index size is larger than the pointer, that means that this is a + // 32-bit host platform with a 64-bit wasm linear memory. If the index is + // larger than 2**32 then that's guranteed to be out-of-bounds, otherwise we + // `ireduce` the index. + // + // Also note that at this time this branch doesn't support pcc nor the + // value-label-ranges of the below path. + // + // Finally, note that the returned `low_bits` here are still subject to an + // explicit bounds check in wasm so in terms of Spectre speculation on + // either side of the `trapnz` should be ok. + if index_ty.bits() > pointer_ty.bits() { + assert_eq!(index_ty, ir::types::I64); + assert_eq!(pointer_ty, ir::types::I32); + let low_bits = pos.ins().ireduce(pointer_ty, index); + let c32 = pos.ins().iconst(pointer_ty, 32); + let high_bits = pos.ins().ushr(index, c32); + let high_bits = pos.ins().ireduce(pointer_ty, high_bits); + pos.ins() + .trapnz(high_bits, ir::TrapCode::HEAP_OUT_OF_BOUNDS); + return low_bits; + } // Convert `index` to `addr_ty`. let extended_index = pos.ins().uextend(pointer_ty, index); diff --git a/crates/environ/src/compile/module_artifacts.rs b/crates/environ/src/compile/module_artifacts.rs index 7d9d132eddd6..9d992e25405c 100644 --- a/crates/environ/src/compile/module_artifacts.rs +++ b/crates/environ/src/compile/module_artifacts.rs @@ -274,12 +274,16 @@ impl<'a> ObjectBuilder<'a> { /// A type which can be the result of serializing an object. pub trait FinishedObject: Sized { + /// State required for `finish_object`, if any. + type State; + /// Emit the object as `Self`. - fn finish_object(obj: ObjectBuilder<'_>) -> Result; + fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result; } impl FinishedObject for Vec { - fn finish_object(obj: ObjectBuilder<'_>) -> Result { + type State = (); + fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result { let mut result = ObjectVec::default(); obj.finish(&mut result)?; return Ok(result.0); diff --git a/crates/fuzzing/build.rs b/crates/fuzzing/build.rs index 08b9bddfe3a0..f58919189f26 100644 --- a/crates/fuzzing/build.rs +++ b/crates/fuzzing/build.rs @@ -15,7 +15,8 @@ fn main() { root.pop(); // chop off 'fuzzing' root.pop(); // chop off 'crates' - let tests = wasmtime_wast_util::find_tests(&root).unwrap(); + let mut tests = wasmtime_wast_util::find_tests(&root).unwrap(); + tests.sort_by_key(|test| test.path.clone()); let mut code = format!("static FILES: &[fn() -> wasmtime_wast_util::WastTest] = &[\n"); diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 36d07f498966..3452f961a762 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -687,10 +687,26 @@ impl WasmtimeConfig { /// the current state of the engine's development. fn make_internally_consistent(&mut self) { if !self.signals_based_traps { - // Spectre-based heap mitigations require signal handlers so this - // must always be disabled if signals-based traps are disabled. if let MemoryConfig::Normal(cfg) = &mut self.memory_config { + // Spectre-based heap mitigations require signal handlers so + // this must always be disabled if signals-based traps are + // disabled. cfg.cranelift_enable_heap_access_spectre_mitigations = None; + + // With configuration settings that match the use of malloc for + // linear memories cap the `memory_reservation_for_growth` value + // to something reasonable to avoid OOM in fuzzing. + if !cfg.memory_init_cow + && cfg.memory_guard_size == Some(0) + && cfg.memory_reservation == Some(0) + { + let min = 10 << 20; // 10 MiB + if let Some(val) = &mut cfg.memory_reservation_for_growth { + *val = (*val).min(min); + } else { + cfg.memory_reservation_for_growth = Some(min); + } + } } } } diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 1222fb9607c4..1598bbf05e51 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -64,6 +64,7 @@ pub(crate) fn build_artifacts( engine: &Engine, wasm: &[u8], dwarf_package: Option<&[u8]>, + obj_state: &T::State, ) -> Result<(T, Option<(CompiledModuleInfo, ModuleTypes)>)> { let tunables = engine.tunables(); @@ -111,7 +112,7 @@ pub(crate) fn build_artifacts( let info = compilation_artifacts.unwrap_as_module_info(); let types = types.finish(); object.serialize_info(&(&info, &types)); - let result = T::finish_object(object)?; + let result = T::finish_object(object, obj_state)?; Ok((result, Some((info, types)))) } @@ -128,6 +129,7 @@ pub(crate) fn build_component_artifacts( engine: &Engine, binary: &[u8], _dwarf_package: Option<&[u8]>, + obj_state: &T::State, ) -> Result<(T, Option)> { use wasmtime_environ::component::{ CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder, @@ -186,7 +188,7 @@ pub(crate) fn build_component_artifacts( }; object.serialize_info(&artifacts); - let result = T::finish_object(object)?; + let result = T::finish_object(object, obj_state)?; Ok((result, Some(artifacts))) } diff --git a/crates/wasmtime/src/compile/code_builder.rs b/crates/wasmtime/src/compile/code_builder.rs index ec8b543292c7..d6b33fb25f1f 100644 --- a/crates/wasmtime/src/compile/code_builder.rs +++ b/crates/wasmtime/src/compile/code_builder.rs @@ -274,7 +274,7 @@ impl<'a> CodeBuilder<'a> { pub fn compile_module_serialized(&self) -> Result> { let wasm = self.get_wasm()?; let dwarf_package = self.get_dwarf_package(); - let (v, _) = super::build_artifacts(self.engine, &wasm, dwarf_package.as_deref())?; + let (v, _) = super::build_artifacts(self.engine, &wasm, dwarf_package.as_deref(), &())?; Ok(v) } @@ -284,7 +284,7 @@ impl<'a> CodeBuilder<'a> { #[cfg(feature = "component-model")] pub fn compile_component_serialized(&self) -> Result> { let bytes = self.get_wasm()?; - let (v, _) = super::build_component_artifacts(self.engine, &bytes, None)?; + let (v, _) = super::build_component_artifacts(self.engine, &bytes, None, &())?; Ok(v) } } diff --git a/crates/wasmtime/src/compile/runtime.rs b/crates/wasmtime/src/compile/runtime.rs index 781be44829e8..23a40eaaf433 100644 --- a/crates/wasmtime/src/compile/runtime.rs +++ b/crates/wasmtime/src/compile/runtime.rs @@ -9,9 +9,15 @@ use std::sync::Arc; use wasmtime_environ::{FinishedObject, ObjectBuilder, ObjectKind}; impl<'a> CodeBuilder<'a> { - fn compile_cached( + fn compile_cached( &self, - build_artifacts: fn(&Engine, &[u8], Option<&[u8]>) -> Result<(MmapVecWrapper, Option)>, + build_artifacts: fn( + &Engine, + &[u8], + Option<&[u8]>, + &S, + ) -> Result<(MmapVecWrapper, Option)>, + state: &S, ) -> Result<(Arc, Option)> { let wasm = self.get_wasm()?; let dwarf_package = self.get_dwarf_package(); @@ -28,24 +34,32 @@ impl<'a> CodeBuilder<'a> { &dwarf_package, // Don't hash this as it's just its own "pure" function pointer. NotHashed(build_artifacts), + // Don't hash the FinishedObject state: this contains + // things like required runtime alignment, and does + // not impact the compilation result itself. + NotHashed(state), ); let (code, info_and_types) = wasmtime_cache::ModuleCacheEntry::new("wasmtime", self.engine.cache_config()) .get_data_raw( &state, // Cache miss, compute the actual artifacts - |(engine, wasm, dwarf_package, build_artifacts)| -> Result<_> { - let (mmap, info) = - (build_artifacts.0)(engine.0, wasm, dwarf_package.as_deref())?; - let code = publish_mmap(mmap.0)?; + |(engine, wasm, dwarf_package, build_artifacts, state)| -> Result<_> { + let (mmap, info) = (build_artifacts.0)( + engine.0, + wasm, + dwarf_package.as_deref(), + state.0, + )?; + let code = publish_mmap(engine.0, mmap.0)?; Ok((code, info)) }, // Implementation of how to serialize artifacts - |(_engine, _wasm, _, _), (code, _info_and_types)| { + |(_engine, _wasm, _, _, _), (code, _info_and_types)| { Some(code.mmap().to_vec()) }, // Cache hit, deserialize the provided artifacts - |(engine, wasm, _, _), serialized_bytes| { + |(engine, wasm, _, _, _), serialized_bytes| { let kind = if wasmparser::Parser::is_component(&wasm) { ObjectKind::Component } else { @@ -61,8 +75,8 @@ impl<'a> CodeBuilder<'a> { #[cfg(not(feature = "cache"))] { let (mmap, info_and_types) = - build_artifacts(self.engine, &wasm, dwarf_package.as_deref())?; - let code = publish_mmap(mmap.0)?; + build_artifacts(self.engine, &wasm, dwarf_package.as_deref(), state)?; + let code = publish_mmap(self.engine, mmap.0)?; return Ok((code, info_and_types)); } @@ -79,7 +93,9 @@ impl<'a> CodeBuilder<'a> { /// Note that this method will cache compilations if the `cache` feature is /// enabled and turned on in [`Config`](crate::Config). pub fn compile_module(&self) -> Result { - let (code, info_and_types) = self.compile_cached(super::build_artifacts)?; + let custom_alignment = self.custom_alignment(); + let (code, info_and_types) = + self.compile_cached(super::build_artifacts, &custom_alignment)?; Module::from_parts(self.engine, code, info_and_types) } @@ -87,22 +103,42 @@ impl<'a> CodeBuilder<'a> { /// [`Component`] instead of a module. #[cfg(feature = "component-model")] pub fn compile_component(&self) -> Result { - let (code, artifacts) = self.compile_cached(super::build_component_artifacts)?; + let custom_alignment = self.custom_alignment(); + let (code, artifacts) = + self.compile_cached(super::build_component_artifacts, &custom_alignment)?; Component::from_parts(self.engine, code, artifacts) } + + fn custom_alignment(&self) -> CustomAlignment { + CustomAlignment { + alignment: self + .engine + .custom_code_memory() + .map(|c| c.required_alignment()) + .unwrap_or(1), + } + } } -fn publish_mmap(mmap: MmapVec) -> Result> { - let mut code = CodeMemory::new(mmap)?; +fn publish_mmap(engine: &Engine, mmap: MmapVec) -> Result> { + let mut code = CodeMemory::new(engine, mmap)?; code.publish()?; Ok(Arc::new(code)) } pub(crate) struct MmapVecWrapper(pub MmapVec); +/// Custom alignment requirements from the Engine for +/// produced-at-runtime-in-memory code artifacts. +pub(crate) struct CustomAlignment { + alignment: usize, +} + impl FinishedObject for MmapVecWrapper { - fn finish_object(obj: ObjectBuilder<'_>) -> Result { + type State = CustomAlignment; + fn finish_object(obj: ObjectBuilder<'_>, align: &CustomAlignment) -> Result { let mut result = ObjectMmap::default(); + result.alignment = align.alignment; return match obj.finish(&mut result) { Ok(()) => { assert!(result.mmap.is_some(), "no reserve"); @@ -127,6 +163,7 @@ impl FinishedObject for MmapVecWrapper { struct ObjectMmap { mmap: Option, len: usize, + alignment: usize, err: Option, } @@ -137,7 +174,7 @@ impl FinishedObject for MmapVecWrapper { fn reserve(&mut self, additional: usize) -> Result<(), ()> { assert!(self.mmap.is_none(), "cannot reserve twice"); - self.mmap = match MmapVec::with_capacity(additional) { + self.mmap = match MmapVec::with_capacity_and_alignment(additional, self.alignment) { Ok(mmap) => Some(mmap), Err(e) => { self.err = Some(e); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 0caf0ddcbb07..22e60bcb03f1 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -30,6 +30,8 @@ use crate::stack::{StackCreator, StackCreatorProxy}; #[cfg(feature = "async")] use wasmtime_fiber::RuntimeFiberStackCreator; +#[cfg(feature = "runtime")] +pub use crate::runtime::code_memory::CustomCodeMemory; #[cfg(feature = "pooling-allocator")] pub use crate::runtime::vm::MpkEnabled; #[cfg(all(feature = "incremental-cache", feature = "cranelift"))] @@ -141,6 +143,8 @@ pub struct Config { pub(crate) cache_config: CacheConfig, #[cfg(feature = "runtime")] pub(crate) mem_creator: Option>, + #[cfg(feature = "runtime")] + pub(crate) custom_code_memory: Option>, pub(crate) allocation_strategy: InstanceAllocationStrategy, pub(crate) max_wasm_stack: usize, /// Explicitly enabled features via `Config::wasm_*` methods. This is a @@ -245,6 +249,8 @@ impl Config { profiling_strategy: ProfilingStrategy::None, #[cfg(feature = "runtime")] mem_creator: None, + #[cfg(feature = "runtime")] + custom_code_memory: None, allocation_strategy: InstanceAllocationStrategy::OnDemand, // 512k of stack -- note that this is chosen currently to not be too // big, not be too small, and be a good default for most platforms. @@ -1387,6 +1393,33 @@ impl Config { self } + /// Sets a custom executable-memory publisher. + /// + /// Custom executable-memory publishers are hooks that allow + /// Wasmtime to make certain regions of memory executable when + /// loading precompiled modules or compiling new modules + /// in-process. In most modern operating systems, memory allocated + /// for heap usage is readable and writable by default but not + /// executable. To jump to machine code stored in that memory, we + /// need to make it executable. For security reasons, we usually + /// also make it read-only at the same time, so the executing code + /// can't be modified later. + /// + /// By default, Wasmtime will use the appropriate system calls on + /// the host platform for this work. However, it also allows + /// plugging in a custom implementation via this configuration + /// option. This may be useful on custom or `no_std` platforms, + /// for example, especially where virtual memory is not otherwise + /// used by Wasmtime (no `signals-and-traps` feature). + #[cfg(feature = "runtime")] + pub fn with_custom_code_memory( + &mut self, + custom_code_memory: Option>, + ) -> &mut Self { + self.custom_code_memory = custom_code_memory; + self + } + /// Sets the instance allocation strategy to use. /// /// This is notably used in conjunction with @@ -1983,12 +2016,7 @@ impl Config { // errors are panics though due to unimplemented bits in ABI // code and those causes are listed here. if self.compiler_target().is_pulley() { - return WasmFeatures::SIMD - | WasmFeatures::RELAXED_SIMD - | WasmFeatures::TAIL_CALL - | WasmFeatures::FLOATS - | WasmFeatures::MEMORY64 - | WasmFeatures::GC_TYPES; + return WasmFeatures::TAIL_CALL; } // Other Cranelift backends are either 100% missing or complete diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 627ffe7a5cb6..05b2ec485649 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -1,5 +1,7 @@ use crate::prelude::*; #[cfg(feature = "runtime")] +pub use crate::runtime::code_memory::CustomCodeMemory; +#[cfg(feature = "runtime")] use crate::runtime::type_registry::TypeRegistry; #[cfg(feature = "runtime")] use crate::runtime::vm::GcRuntime; @@ -669,6 +671,11 @@ impl Engine { &self.inner.signatures } + #[cfg(feature = "runtime")] + pub(crate) fn custom_code_memory(&self) -> Option<&Arc> { + self.config().custom_code_memory.as_ref() + } + pub(crate) fn epoch_counter(&self) -> &AtomicU64 { &self.inner.epoch } @@ -736,6 +743,15 @@ impl Engine { (f1(), f2()) } + /// Returns the required alignment for a code image, if we + /// allocate in a way that is not a system `mmap()` that naturally + /// aligns it. + fn required_code_alignment(&self) -> usize { + self.custom_code_memory() + .map(|c| c.required_alignment()) + .unwrap_or(1) + } + /// Loads a `CodeMemory` from the specified in-memory slice, copying it to a /// uniquely owned mmap. /// @@ -746,7 +762,13 @@ impl Engine { bytes: &[u8], expected: ObjectKind, ) -> Result> { - self.load_code(crate::runtime::vm::MmapVec::from_slice(bytes)?, expected) + self.load_code( + crate::runtime::vm::MmapVec::from_slice_with_alignment( + bytes, + self.required_code_alignment(), + )?, + expected, + ) } /// Like `load_code_bytes`, but creates a mmap from a file on disk. @@ -769,7 +791,7 @@ impl Engine { expected: ObjectKind, ) -> Result> { serialization::check_compatible(self, &mmap, expected)?; - let mut code = crate::CodeMemory::new(mmap)?; + let mut code = crate::CodeMemory::new(self, mmap)?; code.publish()?; Ok(Arc::new(code)) } diff --git a/crates/wasmtime/src/runtime/code_memory.rs b/crates/wasmtime/src/runtime/code_memory.rs index 8ad04f789b71..390e851cac26 100644 --- a/crates/wasmtime/src/runtime/code_memory.rs +++ b/crates/wasmtime/src/runtime/code_memory.rs @@ -2,6 +2,8 @@ use crate::prelude::*; use crate::runtime::vm::{libcalls, MmapVec, UnwindRegistration}; +use crate::Engine; +use alloc::sync::Arc; use core::ops::Range; use object::endian::Endianness; use object::read::{elf::ElfFile64, Object, ObjectSection}; @@ -22,6 +24,7 @@ pub struct CodeMemory { needs_executable: bool, #[cfg(feature = "debug-builtins")] has_native_debug_info: bool, + custom_code_memory: Option>, relocations: Vec<(usize, obj::LibCall)>, @@ -38,6 +41,14 @@ pub struct CodeMemory { impl Drop for CodeMemory { fn drop(&mut self) { + // If there is a custom code memory handler, restore the + // original (non-executable) state of the memory. + if let Some(mem) = self.custom_code_memory.as_ref() { + let text = self.text(); + mem.unpublish_executable(text.as_ptr(), text.len()) + .expect("Executable memory unpublish failed"); + } + // Drop the registrations before `self.mmap` since they (implicitly) refer to it. let _ = self.unwind_registration.take(); #[cfg(feature = "debug-builtins")] @@ -50,13 +61,50 @@ fn _assert() { _assert_send_sync::(); } +/// Interface implemented by an embedder to provide custom +/// implementations of code-memory protection and execute permissions. +pub trait CustomCodeMemory: Send + Sync { + /// The minimal alignment granularity for an address region that + /// can be made executable. + /// + /// Wasmtime does not assume the system page size for this because + /// custom code-memory protection can be used when all other uses + /// of virtual memory are disabled. + fn required_alignment(&self) -> usize; + + /// Publish a region of memory as executable. + /// + /// This should update permissions from the default RW + /// (readable/writable but not executable) to RX + /// (readable/executable but not writable), enforcing W^X + /// discipline. + /// + /// If the platform requires any data/instruction coherence + /// action, that should be performed as part of this hook as well. + /// + /// `ptr` and `ptr.offset(len)` are guaranteed to be aligned as + /// per `required_alignment()`. + fn publish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()>; + + /// Unpublish a region of memory. + /// + /// This should perform the opposite effect of `make_executable`, + /// switching a range of memory back from RX (readable/executable) + /// to RW (readable/writable). It is guaranteed that no code is + /// running anymore from this region. + /// + /// `ptr` and `ptr.offset(len)` are guaranteed to be aligned as + /// per `required_alignment()`. + fn unpublish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()>; +} + impl CodeMemory { /// Creates a new `CodeMemory` by taking ownership of the provided /// `MmapVec`. /// /// The returned `CodeMemory` manages the internal `MmapVec` and the /// `publish` method is used to actually make the memory executable. - pub fn new(mmap: MmapVec) -> Result { + pub fn new(engine: &Engine, mmap: MmapVec) -> Result { let obj = ElfFile64::::parse(&mmap[..]) .map_err(obj::ObjectCrateErrorWrapper) .with_context(|| "failed to parse internal compilation artifact")?; @@ -140,6 +188,7 @@ impl CodeMemory { _ => log::debug!("ignoring section {name}"), } } + Ok(Self { mmap, unwind_registration: None, @@ -151,6 +200,7 @@ impl CodeMemory { needs_executable, #[cfg(feature = "debug-builtins")] has_native_debug_info, + custom_code_memory: engine.custom_code_memory().cloned(), text, unwind, trap_data, @@ -270,28 +320,30 @@ impl CodeMemory { // Switch the executable portion from readonly to read/execute. if self.needs_executable { - #[cfg(feature = "signals-based-traps")] - { - let text = self.text(); - - use wasmtime_jit_icache_coherence as icache_coherence; - - // Clear the newly allocated code from cache if the processor requires it - // - // Do this before marking the memory as R+X, technically we should be able to do it after - // but there are some CPU's that have had errata about doing this with read only memory. - icache_coherence::clear_cache(text.as_ptr().cast(), text.len()) - .expect("Failed cache clear"); - - self.mmap - .make_executable(self.text.clone(), self.enable_branch_protection) - .context("unable to make memory executable")?; - - // Flush any in-flight instructions from the pipeline - icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + if !self.custom_publish()? { + #[cfg(feature = "signals-based-traps")] + { + let text = self.text(); + + use wasmtime_jit_icache_coherence as icache_coherence; + + // Clear the newly allocated code from cache if the processor requires it + // + // Do this before marking the memory as R+X, technically we should be able to do it after + // but there are some CPU's that have had errata about doing this with read only memory. + icache_coherence::clear_cache(text.as_ptr().cast(), text.len()) + .expect("Failed cache clear"); + + self.mmap + .make_executable(self.text.clone(), self.enable_branch_protection) + .context("unable to make memory executable")?; + + // Flush any in-flight instructions from the pipeline + icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + } + #[cfg(not(feature = "signals-based-traps"))] + bail!("this target requires virtual memory to be enabled"); } - #[cfg(not(feature = "signals-based-traps"))] - bail!("this target requires virtual memory to be enabled"); } // With all our memory set up use the platform-specific @@ -307,6 +359,29 @@ impl CodeMemory { Ok(()) } + fn custom_publish(&mut self) -> Result { + if let Some(mem) = self.custom_code_memory.as_ref() { + let text = self.text(); + // The text section should be aligned to + // `custom_code_memory.required_alignment()` due to a + // combination of two invariants: + // + // - MmapVec aligns its start address, even in owned-Vec mode; and + // - The text segment inside the ELF image will be aligned according + // to the platform's requirements. + let text_addr = text.as_ptr() as usize; + assert_eq!(text_addr & (mem.required_alignment() - 1), 0); + + // The custom code memory handler will ensure the + // memory is executable and also handle icache + // coherence. + mem.publish_executable(text.as_ptr(), text.len())?; + Ok(true) + } else { + Ok(false) + } + } + unsafe fn apply_relocations(&mut self) -> Result<()> { if self.relocations.is_empty() { return Ok(()); diff --git a/crates/wasmtime/src/runtime/externals/table.rs b/crates/wasmtime/src/runtime/externals/table.rs index 1131921c1f7e..6a38611032c8 100644 --- a/crates/wasmtime/src/runtime/externals/table.rs +++ b/crates/wasmtime/src/runtime/externals/table.rs @@ -443,7 +443,6 @@ impl Table { mod tests { use super::*; use crate::{Instance, Module, Store}; - use wasmtime_environ::TripleExt; #[test] fn hash_key_is_stable_across_duplicate_store_data_entries() -> Result<()> { @@ -455,20 +454,7 @@ mod tests { (table (export "t") 1 1 externref) ) "#, - ); - // Expect this test to fail on pulley at this time. When pulley supports - // externref this should switch back to using `?` on the constructor - // above for all platforms. - let module = match module { - Ok(module) => { - assert!(!store.engine().target().is_pulley()); - module - } - Err(e) => { - assert!(store.engine().target().is_pulley(), "bad error {e:?}"); - return Ok(()); - } - }; + )?; let instance = Instance::new(&mut store, &module, &[])?; // Each time we `get_table`, we call `Table::from_wasmtime` which adds diff --git a/crates/wasmtime/src/runtime/vm/interpreter.rs b/crates/wasmtime/src/runtime/vm/interpreter.rs index c12c21e74efa..6d0349b9d504 100644 --- a/crates/wasmtime/src/runtime/vm/interpreter.rs +++ b/crates/wasmtime/src/runtime/vm/interpreter.rs @@ -96,9 +96,17 @@ impl InterpreterRef<'_> { // If the VM wants to call out to the host then dispatch that // here based on `sig`. Once that returns we can resume // execution at `resume`. + // + // Note that the `raise` libcall is handled specially here since + // longjmp/setjmp is handled differently than on the host. DoneReason::CallIndirectHost { id, resume } => { - self.call_indirect_host(id); - bytecode = resume; + if u32::from(id) == HostCall::Builtin(BuiltinFunctionIndex::raise()).index() { + self.longjmp(setjmp); + break false; + } else { + self.call_indirect_host(id); + bytecode = resume; + } } // If the VM trapped then process that here and return `false`. DoneReason::Trap(pc) => { @@ -141,22 +149,25 @@ impl InterpreterRef<'_> { // Trap was handled, yay! We don't use `jmp_buf`. TrapTest::Trap { jmp_buf: _ } => {} } + self.longjmp(setjmp); + } - // Perform a "longjmp" by restoring the "setjmp" context saved when this - // started. - // - // FIXME: this is not restoring callee-save state. For example if - // there's more than one Pulley activation on the stack that means that - // the previous one is expecting the callee (the host) to preserve all - // callee-save registers. That's not restored here which means with - // multiple activations we're effectively corrupting callee-save - // registers. - // - // One fix for this is to possibly update the `SystemV` ABI on pulley to - // have no callee-saved registers and make everything caller-saved. That - // would force all trampolines to save all state which is basically - // what we want as they'll naturally restore state if we later return to - // them. + /// Perform a "longjmp" by restoring the "setjmp" context saved when this + /// started. + /// + /// FIXME: this is not restoring callee-save state. For example if + /// there's more than one Pulley activation on the stack that means that + /// the previous one is expecting the callee (the host) to preserve all + /// callee-save registers. That's not restored here which means with + /// multiple activations we're effectively corrupting callee-save + /// registers. + /// + /// One fix for this is to possibly update the `SystemV` ABI on pulley to + /// have no callee-saved registers and make everything caller-saved. That + /// would force all trampolines to save all state which is basically + /// what we want as they'll naturally restore state if we later return to + /// them. + fn longjmp(&mut self, setjmp: Setjmp) { let Setjmp { sp, fp, lr } = setjmp; self.0[XReg::sp].set_ptr(sp); self.0[XReg::fp].set_ptr(fp); diff --git a/crates/wasmtime/src/runtime/vm/memory.rs b/crates/wasmtime/src/runtime/vm/memory.rs index 116830f41ed0..7d5e23760abb 100644 --- a/crates/wasmtime/src/runtime/vm/memory.rs +++ b/crates/wasmtime/src/runtime/vm/memory.rs @@ -290,22 +290,6 @@ impl Memory { // overkill for this purpose. let absolute_max = 0usize.wrapping_sub(page_size); - // Sanity-check what should already be true from wasm module validation. - // Note that for 32-bit targets the absolute maximum is `1<<32` during - // compilation, not one-page-less-than-u32::MAX, so need to handle that - // specially here. - let absolute_max64 = if cfg!(target_pointer_width = "32") { - 1 << 32 - } else { - u64::try_from(absolute_max).unwrap() - }; - if let Ok(size) = ty.minimum_byte_size() { - assert!(size <= absolute_max64); - } - if let Ok(max) = ty.maximum_byte_size() { - assert!(max <= absolute_max64); - } - // If the minimum memory size overflows the size of our own address // space, then we can't satisfy this request, but defer the error to // later so the `store` can be informed that an effective oom is diff --git a/crates/wasmtime/src/runtime/vm/mmap.rs b/crates/wasmtime/src/runtime/vm/mmap.rs index f06bb0a8d436..78e50939c26b 100644 --- a/crates/wasmtime/src/runtime/vm/mmap.rs +++ b/crates/wasmtime/src/runtime/vm/mmap.rs @@ -135,9 +135,9 @@ impl Mmap { /// Return a struct representing a page-aligned offset into the mmap. /// - /// Returns an error if `offset >= self.len_aligned()`. + /// Returns an error if `offset > self.len_aligned()`. pub fn offset(self: &Arc, offset: HostAlignedByteCount) -> Result { - if offset >= self.len_aligned() { + if offset > self.len_aligned() { bail!( "offset {} is not in bounds for mmap: {}", offset, @@ -359,11 +359,8 @@ pub struct MmapOffset { impl MmapOffset { #[inline] fn new(mmap: Arc>, offset: HostAlignedByteCount) -> Self { - // Note < rather than <=. This currently cannot represent the logical - // end of the mmap. We may need to change this if that becomes - // necessary. assert!( - offset < mmap.len_aligned(), + offset <= mmap.len_aligned(), "offset {} is in bounds (< {})", offset, mmap.len_aligned(), diff --git a/crates/wasmtime/src/runtime/vm/mmap_vec.rs b/crates/wasmtime/src/runtime/vm/mmap_vec.rs index f283ef4265bd..33040642384e 100644 --- a/crates/wasmtime/src/runtime/vm/mmap_vec.rs +++ b/crates/wasmtime/src/runtime/vm/mmap_vec.rs @@ -1,13 +1,19 @@ use crate::prelude::*; +#[cfg(not(feature = "signals-based-traps"))] +use crate::runtime::vm::send_sync_ptr::SendSyncPtr; #[cfg(feature = "signals-based-traps")] use crate::runtime::vm::{mmap::UnalignedLength, Mmap}; +#[cfg(not(feature = "signals-based-traps"))] +use alloc::alloc::Layout; use alloc::sync::Arc; use core::ops::{Deref, Range}; +#[cfg(not(feature = "signals-based-traps"))] +use core::ptr::NonNull; #[cfg(feature = "std")] use std::fs::File; /// A type which prefers to store backing memory in an OS-backed memory mapping -/// but can fall back to `Vec` as well. +/// but can fall back to the regular memory allocator as well. /// /// This type is used to store code in Wasmtime and manage read-only and /// executable permissions of compiled images. This is created from either an @@ -20,13 +26,19 @@ use std::fs::File; /// are typically not, then the remaining bytes in the final page for /// mmap-backed instances are unused. /// -/// Note that when `signals-based-traps` is disabled then this type is backed -/// by a normal `Vec`. In such a scenario this type does not support -/// read-only or executable bits and the methods are not available. +/// Note that when `signals-based-traps` is disabled then this type is +/// backed by the regular memory allocator via `alloc` APIs. In such a +/// scenario this type does not support read-only or executable bits +/// and the methods are not available. However, the `CustomCodeMemory` +/// mechanism may be used by the embedder to set up and tear down +/// executable permissions on parts of this storage. pub enum MmapVec { #[doc(hidden)] #[cfg(not(feature = "signals-based-traps"))] - Vec(Vec), + Alloc { + base: SendSyncPtr, + layout: Layout, + }, #[doc(hidden)] #[cfg(feature = "signals-based-traps")] Mmap { @@ -52,20 +64,32 @@ impl MmapVec { } #[cfg(not(feature = "signals-based-traps"))] - fn new_vec(vec: Vec) -> MmapVec { - MmapVec::Vec(vec) + fn new_alloc(len: usize, alignment: usize) -> MmapVec { + let layout = Layout::from_size_align(len, alignment) + .expect("Invalid size or alignment for MmapVec allocation"); + let base = SendSyncPtr::new( + NonNull::new(unsafe { alloc::alloc::alloc_zeroed(layout.clone()) }) + .expect("Allocation of MmapVec storage failed"), + ); + MmapVec::Alloc { base, layout } } - /// Creates a new zero-initialized `MmapVec` with the given `size`. + /// Creates a new zero-initialized `MmapVec` with the given `size` + /// and `alignment`. /// /// This commit will return a new `MmapVec` suitably sized to hold `size` /// bytes. All bytes will be initialized to zero since this is a fresh OS /// page allocation. - pub fn with_capacity(size: usize) -> Result { + pub fn with_capacity_and_alignment(size: usize, alignment: usize) -> Result { #[cfg(feature = "signals-based-traps")] - return Ok(MmapVec::new_mmap(Mmap::with_at_least(size)?, size)); + { + assert!(alignment <= crate::runtime::vm::host_page_size()); + return Ok(MmapVec::new_mmap(Mmap::with_at_least(size)?, size)); + } #[cfg(not(feature = "signals-based-traps"))] - return Ok(MmapVec::new_vec(vec![0; size])); + { + return Ok(MmapVec::new_alloc(size, alignment)); + } } /// Creates a new `MmapVec` from the contents of an existing `slice`. @@ -74,7 +98,21 @@ impl MmapVec { /// `slice` is copied into the new mmap. It's recommended to avoid this /// method if possible to avoid the need to copy data around. pub fn from_slice(slice: &[u8]) -> Result { - let mut result = MmapVec::with_capacity(slice.len())?; + MmapVec::from_slice_with_alignment(slice, 1) + } + + /// Creates a new `MmapVec` from the contents of an existing + /// `slice`, with a minimum alignment. + /// + /// `align` must be a power of two. This is useful when page + /// alignment is required when the system otherwise does not use + /// virtual memory but has a custom code publish handler. + /// + /// A new `MmapVec` is allocated to hold the contents of `slice` and then + /// `slice` is copied into the new mmap. It's recommended to avoid this + /// method if possible to avoid the need to copy data around. pub + pub fn from_slice_with_alignment(slice: &[u8], align: usize) -> Result { + let mut result = MmapVec::with_capacity_and_alignment(slice.len(), align)?; // SAFETY: The mmap hasn't been made readonly yet so this should be // safe to call. unsafe { @@ -132,7 +170,7 @@ impl MmapVec { pub fn original_file(&self) -> Option<&Arc> { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(_) => None, + MmapVec::Alloc { .. } => None, #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, .. } => mmap.original_file(), } @@ -155,7 +193,9 @@ impl MmapVec { pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(v) => v, + MmapVec::Alloc { base, layout } => { + core::slice::from_raw_parts_mut(base.as_mut(), layout.size()) + } #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, len } => mmap.slice_mut(0..*len), } @@ -169,7 +209,9 @@ impl Deref for MmapVec { fn deref(&self) -> &[u8] { match self { #[cfg(not(feature = "signals-based-traps"))] - MmapVec::Vec(v) => v, + MmapVec::Alloc { base, layout } => unsafe { + core::slice::from_raw_parts(base.as_ptr(), layout.size()) + }, #[cfg(feature = "signals-based-traps")] MmapVec::Mmap { mmap, len } => { // SAFETY: all bytes for this mmap, which is owned by @@ -180,13 +222,28 @@ impl Deref for MmapVec { } } +impl Drop for MmapVec { + fn drop(&mut self) { + match self { + #[cfg(not(feature = "signals-based-traps"))] + MmapVec::Alloc { base, layout, .. } => unsafe { + alloc::alloc::dealloc(base.as_mut(), layout.clone()); + }, + #[cfg(feature = "signals-based-traps")] + MmapVec::Mmap { .. } => { + // Drop impl on the `mmap` takes care of this case. + } + } + } +} + #[cfg(test)] mod tests { use super::MmapVec; #[test] fn smoke() { - let mut mmap = MmapVec::with_capacity(10).unwrap(); + let mut mmap = MmapVec::with_capacity_and_alignment(10, 1).unwrap(); assert_eq!(mmap.len(), 10); assert_eq!(&mmap[..], &[0; 10]); @@ -198,4 +255,11 @@ mod tests { assert_eq!(mmap[0], 1); assert_eq!(mmap[2], 3); } + + #[test] + fn alignment() { + let mmap = MmapVec::with_capacity_and_alignment(10, 4096).unwrap(); + let raw_ptr = &mmap[0] as *const _ as usize; + assert_eq!(raw_ptr & (4096 - 1), 0); + } } diff --git a/crates/wasmtime/src/runtime/vm/traphandlers.rs b/crates/wasmtime/src/runtime/vm/traphandlers.rs index 4759b45c8944..1c2a3cf30060 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers.rs @@ -640,6 +640,7 @@ impl CallThreadState { /// destructors on the stack, if there are any. unsafe fn unwind(&self) -> ! { debug_assert!(!self.jmp_buf.get().is_null()); + debug_assert!(self.jmp_buf.get() != CallThreadState::JMP_BUF_INTERPRETER_SENTINEL); traphandlers::wasmtime_longjmp(self.jmp_buf.get()); } diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index 18265eabe5b3..1f7215091a39 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -823,6 +823,12 @@ impl VMFuncRef { caller: *mut VMOpaqueContext, args_and_results: *mut [ValRaw], ) -> bool { + // If `caller` is actually a `VMArrayCallHostFuncContext` then skip the + // interpreter, even though it's available, as `array_call` will be + // native code. + if (*self.vmctx).magic == wasmtime_environ::VM_ARRAY_CALL_HOST_FUNC_MAGIC { + return self.array_call_native(caller, args_and_results); + } vm.call(self.array_call.cast(), self.vmctx, caller, args_and_results) } diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index c18e03d15556..fc06d27925b6 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -301,23 +301,13 @@ impl Compiler { } } - // Pulley is just getting started, it implements almost no proposals - // yet. Compiler::CraneliftPulley => { - // Unsupported proposals - if config.memory64() - || config.custom_page_sizes() - || config.multi_memory() - || config.threads() - || config.gc() - || config.function_references() - || config.relaxed_simd() - || config.reference_types() - || config.tail_call() - || config.extended_const() + // Unsupported proposals. Note that other proposals have partial + // support at this time (pulley is a work-in-progress) and so + // individual tests are listed below as "should fail" even if + // they're not covered in this list. + if config.tail_call() || config.wide_arithmetic() - || config.simd() - || config.gc_types() || config.exceptions() || config.stack_switching() { @@ -421,62 +411,208 @@ impl WastTest { } } - // Pulley is in a bit of a special state at this time where it supports - // only a subset of the initial MVP of WebAssembly. That means that no - // test technically passes by default but a few do happen to use just - // the right subset of wasm that we can pass it. For now maintain an - // allow-list of tests that are known to pass in Pulley. As tests are - // fixed they should get added to this list. Over time this list will - // instead get inverted to "these tests are known to fail" once Pulley - // implements more proposals. + if config.compiler.should_fail(&self.config) { + return true; + } + + // Pulley supports a mishmash of proposals at this time as it's in an + // interim state. It doesn't support all of the MVP but it supports + // enough to pass some GC tests for example. This means that + // `Compiler::should_fail` is pretty liberal (the check above). To + // handle this there's an extra check here for an exhaustive list of + // unsupported tests on Pulley. This list will get burned down as + // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { - let supported = [ - "custom-page-sizes/custom-page-sizes-invalid.wast", - "exception-handling/exports.wast", - "extended-const/data.wast", - "misc_testsuite/component-model/adapter.wast", - "misc_testsuite/component-model/aliasing.wast", - "misc_testsuite/component-model/import.wast", - "misc_testsuite/component-model/instance.wast", - "misc_testsuite/component-model/linking.wast", - "misc_testsuite/component-model/nested.wast", - "misc_testsuite/component-model/types.wast", - "misc_testsuite/elem-ref-null.wast", - "misc_testsuite/elem_drop.wast", - "misc_testsuite/empty.wast", - "misc_testsuite/fib.wast", - "misc_testsuite/func-400-params.wast", - "misc_testsuite/gc/more-rec-groups-than-types.wast", - "misc_testsuite/gc/rec-group-funcs.wast", - "misc_testsuite/rs2wasm-add-func.wast", - "misc_testsuite/stack_overflow.wast", - "misc_testsuite/winch/misc.wast", - "threads/exports.wast", + let unsupported = [ + "misc_testsuite/call_indirect.wast", + "misc_testsuite/component-model/fused.wast", + "misc_testsuite/component-model/strings.wast", + "misc_testsuite/embenchen_fannkuch.wast", + "misc_testsuite/embenchen_fasta.wast", + "misc_testsuite/embenchen_ifs.wast", + "misc_testsuite/embenchen_primes.wast", + "misc_testsuite/float-round-doesnt-load-too-much.wast", + "misc_testsuite/function-references/call_indirect.wast", + "misc_testsuite/function-references/instance.wast", + "misc_testsuite/function-references/table_fill.wast", + "misc_testsuite/function-references/table_get.wast", + "misc_testsuite/function-references/table_grow.wast", + "misc_testsuite/function-references/table_set.wast", + "misc_testsuite/gc/anyref_that_is_i31_barriers.wast", + "misc_testsuite/gc/i31ref-of-global-initializers.wast", + "misc_testsuite/gc/i31ref-tables.wast", + "misc_testsuite/int-to-float-splat.wast", + "misc_testsuite/issue1809.wast", + "misc_testsuite/issue4840.wast", + "misc_testsuite/issue4890.wast", + "misc_testsuite/issue6562.wast", + "misc_testsuite/many_table_gets_lead_to_gc.wast", + "misc_testsuite/memory-combos.wast", + "misc_testsuite/memory64/simd.wast", + "misc_testsuite/memory64/threads.wast", + "misc_testsuite/misc_traps.wast", + "misc_testsuite/no-panic.wast", + "misc_testsuite/partial-init-table-segment.wast", + "misc_testsuite/rust_fannkuch.wast", + "misc_testsuite/simd/almost-extmul.wast", + "misc_testsuite/simd/canonicalize-nan.wast", + "misc_testsuite/simd/cvt-from-uint.wast", + "misc_testsuite/simd/interesting-float-splat.wast", + "misc_testsuite/simd/issue4807.wast", + "misc_testsuite/simd/issue6725-no-egraph-panic.wast", + "misc_testsuite/simd/issue_3173_select_v128.wast", + "misc_testsuite/simd/issue_3327_bnot_lowering.wast", + "misc_testsuite/simd/load_splat_out_of_bounds.wast", + "misc_testsuite/simd/replace-lane-preserve.wast", + "misc_testsuite/simd/spillslot-size-fuzzbug.wast", + "misc_testsuite/simd/unaligned-load.wast", + "misc_testsuite/simd/v128-select.wast", + "misc_testsuite/table_copy.wast", + "misc_testsuite/table_copy_on_imported_tables.wast", + "misc_testsuite/threads/LB_atomic.wast", + "misc_testsuite/threads/MP_atomic.wast", + "misc_testsuite/threads/MP_wait.wast", + "misc_testsuite/threads/SB_atomic.wast", + "misc_testsuite/threads/load-store-alignment.wast", + "misc_testsuite/winch/_simd_address.wast", + "misc_testsuite/winch/_simd_const.wast", + "misc_testsuite/winch/_simd_load.wast", + "misc_testsuite/winch/_simd_multivalue.wast", + "misc_testsuite/winch/_simd_store.wast", + "misc_testsuite/winch/global.wast", + "misc_testsuite/winch/select.wast", + "misc_testsuite/winch/table_fill.wast", + "misc_testsuite/winch/table_get.wast", + "misc_testsuite/winch/table_set.wast", + "spec_testsuite/bulk.wast", + "spec_testsuite/call.wast", + "spec_testsuite/call_indirect.wast", + "spec_testsuite/conversions.wast", + "spec_testsuite/elem.wast", + "spec_testsuite/endianness.wast", + "spec_testsuite/f32.wast", + "spec_testsuite/f32_bitwise.wast", + "spec_testsuite/f32_cmp.wast", + "spec_testsuite/f64.wast", + "spec_testsuite/f64_bitwise.wast", + "spec_testsuite/f64_cmp.wast", + "spec_testsuite/fac.wast", + "spec_testsuite/float_exprs.wast", + "spec_testsuite/float_literals.wast", + "spec_testsuite/float_misc.wast", + "spec_testsuite/func_ptrs.wast", + "spec_testsuite/global.wast", + "spec_testsuite/i32.wast", + "spec_testsuite/i64.wast", + "spec_testsuite/if.wast", + "spec_testsuite/imports.wast", + "spec_testsuite/int_exprs.wast", + "spec_testsuite/labels.wast", + "spec_testsuite/left-to-right.wast", + "spec_testsuite/linking.wast", + "spec_testsuite/load.wast", + "spec_testsuite/local_get.wast", + "spec_testsuite/local_set.wast", + "spec_testsuite/local_tee.wast", + "spec_testsuite/loop.wast", + "spec_testsuite/memory.wast", + "spec_testsuite/memory_grow.wast", + "spec_testsuite/proposals/annotations/simd_lane.wast", + "spec_testsuite/proposals/extended-const/elem.wast", + "spec_testsuite/proposals/extended-const/global.wast", + "spec_testsuite/proposals/multi-memory/float_exprs0.wast", + "spec_testsuite/proposals/multi-memory/float_exprs1.wast", + "spec_testsuite/proposals/multi-memory/imports.wast", + "spec_testsuite/proposals/multi-memory/linking0.wast", + "spec_testsuite/proposals/multi-memory/linking3.wast", + "spec_testsuite/proposals/multi-memory/load.wast", + "spec_testsuite/proposals/multi-memory/load2.wast", + "spec_testsuite/proposals/multi-memory/memory.wast", + "spec_testsuite/proposals/multi-memory/memory_grow.wast", + "spec_testsuite/proposals/multi-memory/simd_memory-multi.wast", + "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast", + "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", + "spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", + "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast", + "spec_testsuite/proposals/threads/atomic.wast", + "spec_testsuite/proposals/threads/imports.wast", + "spec_testsuite/proposals/threads/memory.wast", + "spec_testsuite/ref_func.wast", + "spec_testsuite/ref_is_null.wast", + "spec_testsuite/select.wast", + "spec_testsuite/simd_address.wast", + "spec_testsuite/simd_align.wast", + "spec_testsuite/simd_bit_shift.wast", + "spec_testsuite/simd_bitwise.wast", + "spec_testsuite/simd_boolean.wast", + "spec_testsuite/simd_const.wast", + "spec_testsuite/simd_conversions.wast", + "spec_testsuite/simd_f32x4.wast", + "spec_testsuite/simd_f32x4_arith.wast", + "spec_testsuite/simd_f32x4_cmp.wast", + "spec_testsuite/simd_f32x4_pmin_pmax.wast", + "spec_testsuite/simd_f32x4_rounding.wast", + "spec_testsuite/simd_f64x2.wast", + "spec_testsuite/simd_f64x2_arith.wast", + "spec_testsuite/simd_f64x2_cmp.wast", + "spec_testsuite/simd_f64x2_pmin_pmax.wast", + "spec_testsuite/simd_f64x2_rounding.wast", + "spec_testsuite/simd_i16x8_arith.wast", + "spec_testsuite/simd_i16x8_arith2.wast", + "spec_testsuite/simd_i16x8_cmp.wast", + "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", + "spec_testsuite/simd_i16x8_extmul_i8x16.wast", + "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", + "spec_testsuite/simd_i16x8_sat_arith.wast", + "spec_testsuite/simd_i32x4_arith.wast", + "spec_testsuite/simd_i32x4_arith2.wast", + "spec_testsuite/simd_i32x4_cmp.wast", + "spec_testsuite/simd_i32x4_dot_i16x8.wast", + "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", + "spec_testsuite/simd_i32x4_extmul_i16x8.wast", + "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", + "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", + "spec_testsuite/simd_i64x2_arith.wast", + "spec_testsuite/simd_i64x2_arith2.wast", + "spec_testsuite/simd_i64x2_cmp.wast", + "spec_testsuite/simd_i64x2_extmul_i32x4.wast", + "spec_testsuite/simd_i8x16_arith.wast", + "spec_testsuite/simd_i8x16_arith2.wast", + "spec_testsuite/simd_i8x16_cmp.wast", + "spec_testsuite/simd_i8x16_sat_arith.wast", + "spec_testsuite/simd_int_to_int_extend.wast", + "spec_testsuite/simd_lane.wast", + "spec_testsuite/simd_load.wast", + "spec_testsuite/simd_load16_lane.wast", + "spec_testsuite/simd_load32_lane.wast", + "spec_testsuite/simd_load64_lane.wast", + "spec_testsuite/simd_load8_lane.wast", + "spec_testsuite/simd_load_extend.wast", + "spec_testsuite/simd_load_splat.wast", + "spec_testsuite/simd_load_zero.wast", + "spec_testsuite/simd_splat.wast", + "spec_testsuite/simd_store.wast", + "spec_testsuite/simd_store16_lane.wast", + "spec_testsuite/simd_store32_lane.wast", + "spec_testsuite/simd_store64_lane.wast", + "spec_testsuite/simd_store8_lane.wast", + "spec_testsuite/stack.wast", + "spec_testsuite/switch.wast", + "spec_testsuite/table_copy.wast", + "spec_testsuite/table_fill.wast", + "spec_testsuite/table_get.wast", + "spec_testsuite/table_grow.wast", + "spec_testsuite/table_init.wast", + "spec_testsuite/table_set.wast", + "spec_testsuite/traps.wast", ]; - if supported.iter().any(|part| self.path.ends_with(part)) { - return false; - } - - // FIXME: once the backend has enough instruction support move these - // into the above tests since they should pass on 64-bit platforms - // as well. - let supported32bit = [ - "misc_testsuite/winch/table_grow.wast", - "misc_testsuite/table_grow_with_funcref.wast", - // ... - ]; - if cfg!(target_pointer_width = "32") { - if supported32bit.iter().any(|part| self.path.ends_with(part)) { - return false; - } + if unsupported.iter().any(|part| self.path.ends_with(part)) { + return true; } - - return true; - } - - if config.compiler.should_fail(&self.config) { - return true; } // Disable spec tests for proposals that Winch does not implement yet. diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index d3a37dd7ccab..9106ebff521f 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -648,7 +648,7 @@ fn is_matching_assert_invalid_error_message(test: &str, expected: &str, actual: // and another asserts a different error message). Overall we didn't benefit // a whole lot from trying to match errors so just assume the error is // roughly the same and otherwise don't try to match it. - if Path::new(test).starts_with("./tests/spec_testsuite") { + if test.contains("spec_testsuite") { return true; } diff --git a/fuzz/fuzz_targets/pulley.rs b/fuzz/fuzz_targets/pulley.rs index d3bebbff4ee3..35b03494802c 100644 --- a/fuzz/fuzz_targets/pulley.rs +++ b/fuzz/fuzz_targets/pulley.rs @@ -1,7 +1,7 @@ #![no_main] use libfuzzer_sys::{arbitrary::*, fuzz_target}; -use pulley_interpreter_fuzz::{interp, roundtrip}; +use pulley_interpreter_fuzz::roundtrip; fuzz_target!(|data| { let _ = fuzz(data); @@ -11,9 +11,8 @@ fn fuzz(data: &[u8]) -> Result<()> { let _ = env_logger::try_init(); let mut u = Unstructured::new(data); - match u.int_in_range(0..=1)? { + match u.int_in_range(0..=0)? { 0 => roundtrip(Arbitrary::arbitrary_take_rest(u)?), - 1 => interp(Arbitrary::arbitrary_take_rest(u)?), _ => unreachable!(), } diff --git a/pulley/CONTRIBUTING.md b/pulley/CONTRIBUTING.md new file mode 100644 index 000000000000..69a1402125e8 --- /dev/null +++ b/pulley/CONTRIBUTING.md @@ -0,0 +1,278 @@ +# Contributing + +For general contribution to Wasmtime, see Wasmtime's [contributing docs][docs]. + +[docs]: https://docs.wasmtime.dev/contributing.html + +## Adding an instruction to Pulley + +So you want to add an instruction to Pulley. If you're reading this in the +not-so-distant future Pulley probably doesn't support all of WebAssembly yet and +you're interested in helping to improve the situation. This is intended to be a +small guide about how to add an instruction to Pulley through an early example +of doing so. + +#### Choose a test to get passing + +First off find a test in this repository, probably a `*.wast` test, which isn't +currently passing. Check out the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` which has a list of `unsupported` tests for +Pulley. Here we're going to select `./tests/misc_testsuite/control-flow.wast` +as it's a reasonably small test. + +#### See the test failure + +Run this command: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast +``` + +This builds the `wasmtime` CLI with Pulley support enabled (`--features +pulley`), runs the `wast` subcommand, executes with the pulley target +(`--target pulley64`), and then runs our test. As of now this shows: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.08s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:77:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v5 = sdiv.i32 v2, v3`, type = `Some(types::I32)` +``` + +Note that if you run `cargo test --test wast control-flow.wast` it'll also run +the same test under a variety of configurations, but the test is expected to +fail under Pulley. You can update the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` to say the test is expected to pass, and then you +can see a similar failure. + +#### Adding an instruction: Pulley + +Here the failure is what's the most common failure in Pulley right now -- the +Pulley Cranelift backend is not yet complete and is missing a lowering. This +means that there is CLIF that cannot be lowered to Pulley bytecode just yet. + +The first thing to do is to probably add a new opcode to Pulley itself as +Pulley probably can't execute this operation just yet. Here we're interested in +signed 32-bit division. + +Pull up `pulley/src/lib.rs` and you'll be editing the `for_each_op!` macro +definition. If this is a "rare" opcode you can edit the `for_each_extended_op!` +macro instead. The syntax is the same between the two macros. + +Here this is a simple instruction, so let's add it directly: + +```rust +/// `dst = src1 / src2` (signed) +xdiv32_s = XDiv32S { operands: BinaryOperands }; +``` + +This defines the snake-case name of the instruction (`xdiv32_s`) that is used +by the disassembler and visitor trait, the upper-camel-case name of the +instruction (`XDiv32S`) used for the Rust type and `enum` variant, and +immediates and operands in the instruction itself. In this case it's a binary +operation using integer ("x") registers. + +> Note: By convention, we tend to include the class ("x") and width ("32") of +> registers operated upon by an instruction in its name. This distinguishes +> between, for example, 32-bit integer addition (`xadd32`) and 64-bit floating +> point addition (`fadd64`). + +Rerun our test command and we see: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Compiling pulley-interpreter v29.0.0 (/home/alex/code/wasmtime/pulley) +error[E0046]: not all trait items implemented, missing: `xdiv32_s` + --> pulley/src/interp.rs:807:1 + | +807 | impl OpVisitor for Interpreter<'_> { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ missing `xdiv32_s` in implementation + | + ::: pulley/src/decode.rs:574:17 + | +574 | fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return; + | ---------------------------------------------------------------------------- `xdiv32_s` from trait + + Compiling cranelift-codegen-meta v0.116.0 (/home/alex/code/wasmtime/cranelift/codegen/meta) +For more information about this error, try `rustc --explain E0046`. +``` + +This indicates that we need to actually implement the new opcode in the +interpreter. Open up `pulley/src/interp.rs` and append to `impl OpVisitor for +Interpreter` or `impl ExtendedOpVisitor for Interpreter` as appropriate. Here +we'll add: + +```rust +fn xdiv32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } +} +``` + +Note that division needs to handle the case that the divisor is 0 or causes an +overflow, hence the use of `checked_div` here. If that happens then a trap is +returned, otherwise interpretation will continue. Also note that the `get_i32` +method is used to specifically match the width and signedness of the +instruction itself, signed 32-bit division. Look around at other instructions +in `interp.rs` for inspiration of how to do various operations. + +Running our test again we get the same error as before! + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.08s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:77:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v5 = sdiv.i32 v2, v3`, type = `Some(types::I32)` +``` + +That leads us to the next part... + +#### Adding a Cranelift Lowering + +Next up we need to actually fix the error at hand, a new lowering rule needs to +be added to Cranelift. Here we'll be working in +`cranelift/codegen/src/isa/pulley_shared/lower.isle`. + +Our ISLE lowering rules are generally written like this: + +```lisp +(rule (lower ) + ) +``` + +This means "when lowering Cranelift's mid-level IR down to Pulley bytecode, and +we match the snippet ``, replace it with ``". (For more details, +see the [ISLE Language +Reference](https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/isle/docs/language-reference.md) +and [How ISLE is Integrated with +Cranelift](https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/isle-integration.md)) + + +In our case, we need to match an `sdiv` CLIF instruction that is operating on +32-bit values (this is the `(has_type $I32 ...)` bit) and then replace it with +our new Pulley `xdiv32_s` instruction: + +``` +;;;; Rules for `idiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (sdiv a b))) + (pulley_xdiv32_s a b)) +``` + +Note that ISLE constructors for Pulley instructions, including the +`pulley_xdivs32_s` constructor for our new `xdiv32_s` Pulley instruction, are +automatically generated from the `for_each_op!` macro. + +Running our test again yields: + +``` +Error: failed to run script file './tests/misc_testsuite/control-flow.wast' + +Caused by: + 0: failed directive on ./tests/misc_testsuite/control-flow.wast:83:1 + 1: Compilation error: Unsupported feature: should be implemented in ISLE: inst = `v26 = band.i32 v2, v13 ; v13 = 3`, type = `Some(types::I32)` +``` + +Progress! This is a different error than before. Now it's time to rinse and +repeat these steps. Be sure to skim the rest of `lower.isle` for inspiration on +how to implement lowering rules. You can also look at `lower.isle` for other +architecture backends too for inspiration. + +#### Flagging a test as passing + +After implementing a lowering for `band.i32` our test case is now passing: + +``` +$ cargo run --features pulley -- wast --target pulley64 ./tests/misc_testsuite/control-flow.wast + Finished `dev` profile [unoptimized + debuginfo] target(s) in 13.50s + Running `target/debug/wasmtime wast --target pulley64 ./tests/misc_testsuite/control-flow.wast` +``` + +If we run the test suite though we'll see: + +``` +$ cargo test --test wast control-flow.wast + Finished `test` profile [unoptimized + debuginfo] target(s) in 29.14s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + +running 6 tests +...F.F +failures: + +---- CraneliftPulley/./tests/misc_testsuite/control-flow.wast ---- +this test is flagged as should-fail but it succeeded + +---- CraneliftPulley/pooling/./tests/misc_testsuite/control-flow.wast ---- +this test is flagged as should-fail but it succeeded + + +failures: + CraneliftPulley/./tests/misc_testsuite/control-flow.wast + CraneliftPulley/pooling/./tests/misc_testsuite/control-flow.wast + +test result: FAILED. 4 passed; 2 failed; 0 ignored; 0 measured; 4086 filtered out; finished in 0.05s + +error: test failed, to rerun pass `--test wast` +``` + +This indicates that the test was previously flagged as "should fail", but that +assertion is no longer true! Update the `WastTest::should_fail` method in +`crates/wast-util/src/lib.rs` so that it expects the test to pass by deleting +the tests from the `unsupported` list. Then we'll see: + +``` +$ cargo test --test wast control-flow.wast + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.74s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + +running 6 tests +...... +test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 4086 filtered out; finished in 0.05s +``` + +Success! + +But we aren't quite done yet: the new lowerings we added might have made +additional tests that previously failed start passing as well. If so, then we +also want to mark those tests as expected to pass now. We can double check by +running the full `wast` test suite: + +``` +$ cargo test --test wast Pulley + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.74s + Running tests/wast.rs (target/debug/deps/wast-f83a3ee5e5dbacde) + + +running 1364 tests +.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... +test result: ok. 1364 passed; 0 failed; 0 ignored; 0 measured; 2728 filtered out; finished in 0.93s +``` + +Alas, maybe next time! + +#### Clean up and make a PR + +All that's left now is to clean things up, document anything necessary, and make +a pull request. + +To view the complete pull request that implemented `xdiv32_s` and `xand32` +Pulley instructions and got `./tests/misc_testsuite/control-flow.wast` passing +(and also introduced this documentation) check out +[#9765](https://github.com/bytecodealliance/wasmtime/pull/9765). + +Thanks for helping out! diff --git a/pulley/fuzz/src/interp.rs b/pulley/fuzz/src/interp.rs deleted file mode 100644 index be968d4ceeca..000000000000 --- a/pulley/fuzz/src/interp.rs +++ /dev/null @@ -1,142 +0,0 @@ -use pulley_interpreter::{ - interp::{DoneReason, Vm}, - op::{self, ExtendedOp, Op}, - *, -}; -use std::ptr::NonNull; - -pub fn interp(ops: Vec) { - let _ = env_logger::try_init(); - - log::trace!("input: {ops:#?}"); - - let mut ops = ops; - ops.retain(|op| op_is_safe_for_fuzzing(op)); - // Make sure that we end with a `ret` so that the interpreter returns - // control to us instead of continuing off the end of the ops and into - // undefined memory. - ops.push(Op::Ret(op::Ret {})); - - log::trace!("filtered to only safe ops: {ops:#?}"); - - let mut encoded = vec![]; - for op in &ops { - op.encode(&mut encoded); - } - log::trace!("encoded: {encoded:?}"); - - let mut vm = Vm::new(); - unsafe { - let args = &[]; - let rets = &[]; - match vm.call(NonNull::from(&encoded[0]), args, rets.into_iter().copied()) { - DoneReason::ReturnToHost(rets) => assert_eq!(rets.count(), 0), - DoneReason::Trap(pc) => { - let pc = pc.as_ptr() as usize; - - let start = &encoded[0] as *const u8 as usize; - let end = encoded.last().unwrap() as *const u8 as usize; - assert!( - start <= pc && pc < end, - "pc should be in range {start:#018x}..{end:#018x}, got {pc:#018x}" - ); - - let index = pc - start; - assert_eq!(encoded[index], Opcode::ExtendedOp as u8); - let [a, b] = (ExtendedOpcode::Trap as u16).to_le_bytes(); - assert_eq!(encoded[index + 1], a); - assert_eq!(encoded[index + 2], b); - } - DoneReason::CallIndirectHost { .. } => unreachable!(), - }; - } -} - -fn op_is_safe_for_fuzzing(op: &Op) -> bool { - match op { - Op::Ret(_) => true, - Op::Jump(_) => false, - Op::BrIf(_) => false, - Op::BrIfNot(_) => false, - Op::BrIfXeq32(_) => false, - Op::BrIfXneq32(_) => false, - Op::BrIfXult32(_) => false, - Op::BrIfXulteq32(_) => false, - Op::BrIfXslt32(_) => false, - Op::BrIfXslteq32(_) => false, - Op::BrIfXeq64(_) => false, - Op::BrIfXneq64(_) => false, - Op::BrIfXult64(_) => false, - Op::BrIfXulteq64(_) => false, - Op::BrIfXslt64(_) => false, - Op::BrIfXslteq64(_) => false, - Op::Xmov(op::Xmov { dst, .. }) => !dst.is_special(), - Op::Fmov(_) => true, - Op::Vmov(_) => true, - Op::Xconst8(op::Xconst8 { dst, .. }) => !dst.is_special(), - Op::Xconst16(op::Xconst16 { dst, .. }) => !dst.is_special(), - Op::Xconst32(op::Xconst32 { dst, .. }) => !dst.is_special(), - Op::Xconst64(op::Xconst64 { dst, .. }) => !dst.is_special(), - Op::Load32U(_) => false, - Op::Load32S(_) => false, - Op::Load64(_) => false, - Op::Load32UOffset8(_) => false, - Op::Load32SOffset8(_) => false, - Op::Load32UOffset64(_) => false, - Op::Load32SOffset64(_) => false, - Op::Load64Offset8(_) => false, - Op::Load64Offset64(_) => false, - Op::Store32(_) => false, - Op::Store64(_) => false, - Op::Store32SOffset8(_) => false, - Op::Store32SOffset64(_) => false, - Op::Store64Offset8(_) => false, - Op::Store64Offset64(_) => false, - Op::BitcastIntFromFloat32(op::BitcastIntFromFloat32 { dst, .. }) => !dst.is_special(), - Op::BitcastIntFromFloat64(op::BitcastIntFromFloat64 { dst, .. }) => !dst.is_special(), - Op::BitcastFloatFromInt32(_) => true, - Op::BitcastFloatFromInt64(_) => true, - Op::ExtendedOp(op) => extended_op_is_safe_for_fuzzing(op), - Op::Call(_) => false, - Op::CallIndirect(_) => false, - Op::Xadd32(Xadd32 { operands, .. }) - | Op::Xadd64(Xadd64 { operands, .. }) - | Op::Xeq64(Xeq64 { operands, .. }) - | Op::Xneq64(Xneq64 { operands, .. }) - | Op::Xslt64(Xslt64 { operands, .. }) - | Op::Xslteq64(Xslteq64 { operands, .. }) - | Op::Xult64(Xult64 { operands, .. }) - | Op::Xulteq64(Xulteq64 { operands, .. }) - | Op::Xeq32(Xeq32 { operands, .. }) - | Op::Xneq32(Xneq32 { operands, .. }) - | Op::Xslt32(Xslt32 { operands, .. }) - | Op::Xslteq32(Xslteq32 { operands, .. }) - | Op::Xult32(Xult32 { operands, .. }) - | Op::Xulteq32(Xulteq32 { operands, .. }) => !operands.dst.is_special(), - Op::PushFrame(_) | Op::PopFrame(_) => false, - Op::XPush32(_) | Op::XPush64(_) => false, - Op::XPop32(_) | Op::XPop64(_) => false, - Op::XPush32Many(_) | Op::XPush64Many(_) => false, - Op::XPop32Many(_) | Op::XPop64Many(_) => false, - Op::BrTable32(_) => false, - Op::StackAlloc32(_) => false, - Op::StackFree32(_) => false, - Op::Zext8(Zext8 { dst, .. }) - | Op::Zext16(Zext16 { dst, .. }) - | Op::Zext32(Zext32 { dst, .. }) - | Op::Sext8(Sext8 { dst, .. }) - | Op::Sext32(Sext32 { dst, .. }) - | Op::Sext16(Sext16 { dst, .. }) => !dst.is_special(), - } -} - -fn extended_op_is_safe_for_fuzzing(op: &ExtendedOp) -> bool { - match op { - ExtendedOp::Trap(_) => true, - ExtendedOp::Nop(_) => true, - ExtendedOp::CallIndirectHost(_) => false, - ExtendedOp::Bswap32(Bswap32 { dst, .. }) | ExtendedOp::Bswap64(Bswap64 { dst, .. }) => { - !dst.is_special() - } - } -} diff --git a/pulley/fuzz/src/lib.rs b/pulley/fuzz/src/lib.rs index b041d1676312..d42468227ad8 100644 --- a/pulley/fuzz/src/lib.rs +++ b/pulley/fuzz/src/lib.rs @@ -1,5 +1,2 @@ mod roundtrip; pub use roundtrip::*; - -mod interp; -pub use interp::*; diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 1783ec78b102..c880eb1d40ff 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -10,12 +10,13 @@ use core::fmt; use core::mem; use core::ops::ControlFlow; use core::ops::{Index, IndexMut}; -use core::ptr::{self, NonNull}; +use core::ptr::NonNull; use sptr::Strict; -#[cfg(not(pulley_tail_calls))] +mod debug; +#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))] mod match_loop; -#[cfg(pulley_tail_calls)] +#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))] mod tail_loop; const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB @@ -534,6 +535,24 @@ impl Default for VRegVal { } } +#[allow(missing_docs)] +impl VRegVal { + pub fn new_u128(i: u128) -> Self { + let mut val = Self::default(); + val.set_u128(i); + val + } + + pub fn get_u128(&self) -> u128 { + let val = unsafe { self.0.u128 }; + u128::from_le(val) + } + + pub fn set_u128(&mut self, val: u128) { + self.0.u128 = val.to_le(); + } +} + /// The machine state for a Pulley virtual machine: the various registers and /// stack. pub struct MachineState { @@ -650,7 +669,8 @@ impl MachineState { /// Inner private module to prevent creation of the `Done` structure outside of /// this module. mod done { - use super::{Interpreter, MachineState}; + use super::{Encode, Interpreter, MachineState}; + use core::ops::ControlFlow; use core::ptr::NonNull; /// Zero-sized sentinel indicating that pulley execution has halted. @@ -688,24 +708,31 @@ mod done { impl Interpreter<'_> { /// Finishes execution by recording `DoneReason::Trap`. - pub fn done_trap(&mut self, pc: NonNull) -> Done { + /// + /// This method takes an `I` generic parameter indicating which + /// instruction is executing this function and generating a trap. That's + /// used to go backwards from the current `pc` which is just beyond the + /// instruction to point to the instruction itself in the trap metadata + /// returned from the interpreter. + pub fn done_trap(&mut self) -> ControlFlow { + let pc = self.current_pc::(); self.state.done_reason = Some(DoneReason::Trap(pc)); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } /// Finishes execution by recording `DoneReason::CallIndirectHost`. - pub fn done_call_indirect_host(&mut self, id: u8) -> Done { + pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow { self.state.done_reason = Some(DoneReason::CallIndirectHost { id, resume: self.pc.as_ptr(), }); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } /// Finishes execution by recording `DoneReason::ReturnToHost`. - pub fn done_return_to_host(&mut self) -> Done { + pub fn done_return_to_host(&mut self) -> ControlFlow { self.state.done_reason = Some(DoneReason::ReturnToHost(())); - Done { _priv: () } + ControlFlow::Break(Done { _priv: () }) } } } @@ -719,10 +746,17 @@ struct Interpreter<'a> { } impl Interpreter<'_> { + /// Performs a relative jump of `offset` bytes from the current instruction. + /// + /// This will jump from the start of the current instruction, identified by + /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this + /// function actually points to the instruction after this one so `I` is + /// necessary to go back to ourselves after which we then go `offset` away. #[inline] - fn pc_rel_jump(&mut self, offset: PcRelOffset, inst_size: isize) -> ControlFlow { + fn pc_rel_jump(&mut self, offset: PcRelOffset) -> ControlFlow { let offset = isize::try_from(i32::from(offset)).unwrap(); - self.pc = unsafe { self.pc.offset(offset - inst_size) }; + let my_pc = self.current_pc::(); + self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) }; ControlFlow::Continue(()) } @@ -733,10 +767,13 @@ impl Interpreter<'_> { } /// `sp -= size_of::(); *sp = val;` + /// + /// Note that `I` is the instruction which is pushing data to use if a trap + /// is generated. #[must_use] - fn push(&mut self, val: T, pc: NonNull) -> ControlFlow { + fn push(&mut self, val: T) -> ControlFlow { let new_sp = self.state[XReg::sp].get_ptr::().wrapping_sub(1); - self.set_sp(new_sp, pc)?; + self.set_sp::(new_sp.cast())?; unsafe { new_sp.write_unaligned(val); } @@ -755,12 +792,16 @@ impl Interpreter<'_> { /// /// Returns a trap if this would result in stack overflow, or if `sp` is /// beneath the base pointer of `self.state.stack`. + /// + /// The `I` parameter here is the instruction that is setting the stack + /// pointer and is used to calculate this instruction's own `pc` if this + /// instruction traps. #[must_use] - fn set_sp(&mut self, sp: *mut T, pc: NonNull) -> ControlFlow { + fn set_sp(&mut self, sp: *mut u8) -> ControlFlow { let sp_raw = sp as usize; let base_raw = self.state.stack.as_ptr() as usize; if sp_raw < base_raw { - return ControlFlow::Break(self.done_trap(pc)); + return self.done_trap::(); } self.set_sp_unchecked(sp); ControlFlow::Continue(()) @@ -777,26 +818,42 @@ impl Interpreter<'_> { } self.state[XReg::sp].set_ptr(sp); } + + unsafe fn load(&self, ptr: XReg, offset: i32) -> T { + unsafe { + self.state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .read_unaligned() + } + } + + unsafe fn store(&self, ptr: XReg, offset: i32, val: T) { + self.state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .write_unaligned(val) + } } #[test] fn simple_push_pop() { let mut state = MachineState::with_stack(vec![0; 16]); unsafe { + let mut bytecode = [0; 10]; let mut i = Interpreter { state: &mut state, // this isn't actually read so just manufacture a dummy one - pc: UnsafeBytecodeStream::new((&mut 0).into()), + pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()), }; - let pc = NonNull::from(&0); - assert!(i.push(0_i32, pc).is_continue()); + assert!(i.push::(0_i32).is_continue()); assert_eq!(i.pop::(), 0_i32); - assert!(i.push(1_i32, pc).is_continue()); - assert!(i.push(2_i32, pc).is_continue()); - assert!(i.push(3_i32, pc).is_continue()); - assert!(i.push(4_i32, pc).is_continue()); - assert!(i.push(5_i32, pc).is_break()); - assert!(i.push(6_i32, pc).is_break()); + assert!(i.push::(1_i32).is_continue()); + assert!(i.push::(2_i32).is_continue()); + assert!(i.push::(3_i32).is_continue()); + assert!(i.push::(4_i32).is_continue()); + assert!(i.push::(5_i32).is_break()); + assert!(i.push::(6_i32).is_break()); assert_eq!(i.pop::(), 4_i32); assert_eq!(i.pop::(), 3_i32); assert_eq!(i.pop::(), 2_i32); @@ -815,7 +872,7 @@ impl OpVisitor for Interpreter<'_> { fn ret(&mut self) -> ControlFlow { let lr = self.state[XReg::lr]; if lr == XRegVal::HOST_RETURN_ADDR { - ControlFlow::Break(self.done_return_to_host()) + self.done_return_to_host() } else { let return_addr = lr.get_ptr(); self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(return_addr)) }; @@ -826,7 +883,7 @@ impl OpVisitor for Interpreter<'_> { fn call(&mut self, offset: PcRelOffset) -> ControlFlow { let return_addr = self.pc.as_ptr(); self.state[XReg::lr].set_ptr(return_addr.as_ptr()); - self.pc_rel_jump(offset, 5); + self.pc_rel_jump::(offset); ControlFlow::Continue(()) } @@ -843,23 +900,23 @@ impl OpVisitor for Interpreter<'_> { } fn jump(&mut self, offset: PcRelOffset) -> ControlFlow { - self.pc_rel_jump(offset, 5); + self.pc_rel_jump::(offset); ControlFlow::Continue(()) } - fn br_if(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { - let cond = self.state[cond].get_u64(); + fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { + let cond = self.state[cond].get_u32(); if cond != 0 { - self.pc_rel_jump(offset, 6) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } } - fn br_if_not(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { - let cond = self.state[cond].get_u64(); + fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow { + let cond = self.state[cond].get_u32(); if cond == 0 { - self.pc_rel_jump(offset, 6) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -869,7 +926,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a == b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -879,7 +936,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a != b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -889,7 +946,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i32(); let b = self.state[b].get_i32(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -899,7 +956,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i32(); let b = self.state[b].get_i32(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -909,7 +966,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -919,7 +976,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u32(); let b = self.state[b].get_u32(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -929,7 +986,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a == b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -939,7 +996,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a != b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -949,7 +1006,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i64(); let b = self.state[b].get_i64(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -959,7 +1016,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_i64(); let b = self.state[b].get_i64(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -969,7 +1026,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a < b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -979,7 +1036,7 @@ impl OpVisitor for Interpreter<'_> { let a = self.state[a].get_u64(); let b = self.state[b].get_u64(); if a <= b { - self.pc_rel_jump(offset, 7) + self.pc_rel_jump::(offset) } else { ControlFlow::Continue(()) } @@ -1037,275 +1094,336 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_add(b) { + Some(c) => { + self.state[operands.dst].set_u32(c); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_add(b) { + Some(c) => { + self.state[operands.dst].set_u64(c); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xsub32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_sub(b)); + ControlFlow::Continue(()) + } + + fn xsub64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a.wrapping_sub(b)); + ControlFlow::Continue(()) + } + + fn xshl32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_shl(b)); + ControlFlow::Continue(()) + } + + fn xshr32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshr32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_i32(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshl64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u64(a.wrapping_shl(b)); + ControlFlow::Continue(()) + } + + fn xshr64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u64(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + + fn xshr64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_i64(a.wrapping_shr(b)); + ControlFlow::Continue(()) + } + fn xeq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a == b)); + self.state[operands.dst].set_u32(u32::from(a == b)); ControlFlow::Continue(()) } fn xneq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a != b)); + self.state[operands.dst].set_u32(u32::from(a != b)); ControlFlow::Continue(()) } fn xslt64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i64(); let b = self.state[operands.src2].get_i64(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xslteq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i64(); let b = self.state[operands.src2].get_i64(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xult64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xulteq64(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u64(); let b = self.state[operands.src2].get_u64(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xeq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a == b)); + self.state[operands.dst].set_u32(u32::from(a == b)); ControlFlow::Continue(()) } fn xneq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a != b)); + self.state[operands.dst].set_u32(u32::from(a != b)); ControlFlow::Continue(()) } fn xslt32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i32(); let b = self.state[operands.src2].get_i32(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xslteq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_i32(); let b = self.state[operands.src2].get_i32(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } fn xult32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a < b)); + self.state[operands.dst].set_u32(u32::from(a < b)); ControlFlow::Continue(()) } fn xulteq32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); - self.state[operands.dst].set_u64(u64::from(a <= b)); + self.state[operands.dst].set_u32(u32::from(a <= b)); ControlFlow::Continue(()) } - fn load32_u(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { u32::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_u64(u64::from(val)); + fn xload8_u32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u32(val.into()); ControlFlow::Continue(()) } - fn load32_s(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { i32::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_i64(i64::from(val)); + fn xload8_s32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(val.into()); ControlFlow::Continue(()) } - fn load64(&mut self, dst: XReg, ptr: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = unsafe { u64::from_le(ptr::read_unaligned(ptr)) }; - self.state[dst].set_u64(val); + fn xload16le_u32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u32(u16::from_le(val).into()); ControlFlow::Continue(()) } - fn load32_u_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - u32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(u64::from(val)); + fn xload16le_s32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(i16::from_le(val).into()); ControlFlow::Continue(()) } - fn load32_s_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - i32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_i64(i64::from(val)); + fn xload32le_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i32(i32::from_le(val)); ControlFlow::Continue(()) } - fn load32_u_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - u32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(u64::from(val)); + fn xload8_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(val.into()); ControlFlow::Continue(()) } - fn load32_s_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - i32::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_i64(i64::from(val)); + fn xload8_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(val.into()); ControlFlow::Continue(()) } - fn load64_offset8(&mut self, dst: XReg, ptr: XReg, offset: i8) -> ControlFlow { - let val = unsafe { - u64::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(val); + fn xload16le_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u16::from_le(val).into()); ControlFlow::Continue(()) } - fn load64_offset64(&mut self, dst: XReg, ptr: XReg, offset: i64) -> ControlFlow { - let val = unsafe { - u64::from_le( - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .read_unaligned(), - ) - }; - self.state[dst].set_u64(val); + fn xload16le_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i16::from_le(val).into()); ControlFlow::Continue(()) } - fn store32(&mut self, ptr: XReg, src: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = self.state[src].get_u32(); + fn xload32le_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u32::from_le(val).into()); + ControlFlow::Continue(()) + } + + fn xload32le_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i32::from_le(val).into()); + ControlFlow::Continue(()) + } + + fn xload64le_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i64::from_le(val)); + ControlFlow::Continue(()) + } + + fn xstore8_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u8; unsafe { - ptr::write_unaligned(ptr, val.to_le()); + self.store(ptr, offset, val); } ControlFlow::Continue(()) } - fn store64(&mut self, ptr: XReg, src: XReg) -> ControlFlow { - let ptr = self.state[ptr].get_ptr::(); - let val = self.state[src].get_u64(); + fn xstore16le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u16; unsafe { - ptr::write_unaligned(ptr, val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store32_offset8(&mut self, ptr: XReg, offset: i8, src: XReg) -> ControlFlow { + fn xstore32le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { let val = self.state[src].get_u32(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store64_offset8(&mut self, ptr: XReg, offset: i8, src: XReg) -> ControlFlow { + fn xstore64le_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { let val = self.state[src].get_u64(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset.into()) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } - fn store32_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> ControlFlow { - let val = self.state[src].get_u32(); + fn fload32le_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f32(f32::from_bits(u32::from_le(val))); + ControlFlow::Continue(()) + } + + fn fload64le_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f64(f64::from_bits(u64::from_le(val))); + ControlFlow::Continue(()) + } + + fn fstore32le_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f32(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_bits().to_le()); } ControlFlow::Continue(()) } - fn store64_offset64(&mut self, ptr: XReg, offset: i64, src: XReg) -> ControlFlow { - let val = self.state[src].get_u64(); + fn fstore64le_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f64(); unsafe { - self.state[ptr] - .get_ptr::() - .byte_offset(offset as isize) - .write_unaligned(val.to_le()); + self.store(ptr, offset, val.to_bits().to_le()); + } + ControlFlow::Continue(()) + } + + fn vload128le_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u128(u128::from_le(val)); + ControlFlow::Continue(()) + } + + fn vstore128le_offset32(&mut self, ptr: XReg, offset: i32, src: VReg) -> ControlFlow { + let val = self.state[src].get_u128(); + unsafe { + self.store(ptr, offset, val.to_le()); } ControlFlow::Continue(()) } fn xpush32(&mut self, src: XReg) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[src].get_u32(), me)?; + self.push::(self.state[src].get_u32())?; ControlFlow::Continue(()) } fn xpush32_many(&mut self, srcs: RegSet) -> ControlFlow { - let me = self.current_pc::(); for src in srcs { - self.push(self.state[src].get_u32(), me)?; + self.push::(self.state[src].get_u32())?; } ControlFlow::Continue(()) } fn xpush64(&mut self, src: XReg) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[src].get_u64(), me)?; + self.push::(self.state[src].get_u64())?; ControlFlow::Continue(()) } fn xpush64_many(&mut self, srcs: RegSet) -> ControlFlow { - let me = self.current_pc::(); for src in srcs { - self.push(self.state[src].get_u64(), me)?; + self.push::(self.state[src].get_u64())?; } ControlFlow::Continue(()) } @@ -1339,9 +1457,8 @@ impl OpVisitor for Interpreter<'_> { } fn push_frame(&mut self) -> ControlFlow { - let me = self.current_pc::(); - self.push(self.state[XReg::lr].get_ptr::(), me)?; - self.push(self.state[XReg::fp].get_ptr::(), me)?; + self.push::(self.state[XReg::lr].get_ptr::())?; + self.push::(self.state[XReg::fp].get_ptr::())?; self.state[XReg::fp] = self.state[XReg::sp]; ControlFlow::Continue(()) } @@ -1384,16 +1501,20 @@ impl OpVisitor for Interpreter<'_> { // SAFETY: part of the contract of the interpreter is only dealing with // valid bytecode, so this offset should be safe. self.pc = unsafe { self.pc.offset(idx * 4) }; + + // Decode the `PcRelOffset` without tampering with `self.pc` as the + // jump is relative to `self.pc`. let mut tmp = self.pc; let rel = unwrap_uninhabited(PcRelOffset::decode(&mut tmp)); - self.pc_rel_jump(rel, 0) + let offset = isize::try_from(i32::from(rel)).unwrap(); + self.pc = unsafe { self.pc.offset(offset) }; + ControlFlow::Continue(()) } fn stack_alloc32(&mut self, amt: u32) -> ControlFlow { - let me = self.current_pc::(); let amt = usize::try_from(amt).unwrap(); let new_sp = self.state[XReg::sp].get_ptr::().wrapping_sub(amt); - self.set_sp(new_sp, me)?; + self.set_sp::(new_sp)?; ControlFlow::Continue(()) } @@ -1439,6 +1560,284 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_i64(src.into()); ControlFlow::Continue(()) } + + fn xdiv32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xdiv64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_i64(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_i64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xdiv32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_u32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xdiv64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_div(b) { + Some(result) => { + self.state[operands.dst].set_u64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem32_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32(); + let b = self.state[operands.src2].get_i32(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_i32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_i64(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_i64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem32_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_u32(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xrem64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + match a.checked_rem(b) { + Some(result) => { + self.state[operands.dst].set_u64(result); + ControlFlow::Continue(()) + } + None => self.done_trap::(), + } + } + + fn xand32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a & b); + ControlFlow::Continue(()) + } + + fn xand64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a & b); + ControlFlow::Continue(()) + } + + fn xor32(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32(); + let b = self.state[operands.src2].get_u32(); + self.state[operands.dst].set_u32(a | b); + ControlFlow::Continue(()) + } + + fn xor64(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + self.state[operands.dst].set_u64(a | b); + ControlFlow::Continue(()) + } + + fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow { + self.state[dst].set_f32(f32::from_bits(bits)); + ControlFlow::Continue(()) + } + + fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow { + self.state[dst].set_f64(f64::from_bits(bits)); + ControlFlow::Continue(()) + } + + fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.trailing_zeros()); + ControlFlow::Continue(()) + } + + fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.trailing_zeros().into()); + ControlFlow::Continue(()) + } + + fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.leading_zeros()); + ControlFlow::Continue(()) + } + + fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.leading_zeros().into()); + ControlFlow::Continue(()) + } + + fn xselect32( + &mut self, + dst: XReg, + cond: XReg, + if_nonzero: XReg, + if_zero: XReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_u32() + } else { + self.state[if_zero].get_u32() + }; + self.state[dst].set_u32(result); + ControlFlow::Continue(()) + } + + fn xselect64( + &mut self, + dst: XReg, + cond: XReg, + if_nonzero: XReg, + if_zero: XReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_u64() + } else { + self.state[if_zero].get_u64() + }; + self.state[dst].set_u64(result); + ControlFlow::Continue(()) + } + + fn fselect32( + &mut self, + dst: FReg, + cond: XReg, + if_nonzero: FReg, + if_zero: FReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_f32() + } else { + self.state[if_zero].get_f32() + }; + self.state[dst].set_f32(result); + ControlFlow::Continue(()) + } + + fn fselect64( + &mut self, + dst: FReg, + cond: XReg, + if_nonzero: FReg, + if_zero: FReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero].get_f64() + } else { + self.state[if_zero].get_f64() + }; + self.state[dst].set_f64(result); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { @@ -1447,12 +1846,11 @@ impl ExtendedOpVisitor for Interpreter<'_> { } fn trap(&mut self) -> ControlFlow { - let trap_pc = self.current_pc::(); - ControlFlow::Break(self.done_trap(trap_pc)) + self.done_trap::() } fn call_indirect_host(&mut self, id: u8) -> ControlFlow { - ControlFlow::Break(self.done_call_indirect_host(id)) + self.done_call_indirect_host(id) } fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow { @@ -1466,4 +1864,86 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[dst].set_u64(src.swap_bytes()); ControlFlow::Continue(()) } + + fn xload16be_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u16::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload16be_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i16::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload32be_u64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_u64(u32::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload32be_s64_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i32::from_be(val).into()); + ControlFlow::Continue(()) + } + + fn xload64be_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_i64(i64::from_be(val)); + ControlFlow::Continue(()) + } + + fn xstore16be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u16; + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn xstore32be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32(); + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn xstore64be_offset32(&mut self, ptr: XReg, offset: i32, src: XReg) -> ControlFlow { + let val = self.state[src].get_u64(); + unsafe { + self.store(ptr, offset, val.to_be()); + } + ControlFlow::Continue(()) + } + + fn fload32be_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f32(f32::from_bits(u32::from_be(val))); + ControlFlow::Continue(()) + } + + fn fload64be_offset32(&mut self, dst: FReg, ptr: XReg, offset: i32) -> ControlFlow { + let val = unsafe { self.load::(ptr, offset) }; + self.state[dst].set_f64(f64::from_bits(u64::from_be(val))); + ControlFlow::Continue(()) + } + + fn fstore32be_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f32(); + unsafe { + self.store(ptr, offset, val.to_bits().to_be()); + } + ControlFlow::Continue(()) + } + + fn fstore64be_offset32(&mut self, ptr: XReg, offset: i32, src: FReg) -> ControlFlow { + let val = self.state[src].get_f64(); + unsafe { + self.store(ptr, offset, val.to_bits().to_be()); + } + ControlFlow::Continue(()) + } } diff --git a/pulley/src/interp/debug.rs b/pulley/src/interp/debug.rs new file mode 100644 index 000000000000..239136210129 --- /dev/null +++ b/pulley/src/interp/debug.rs @@ -0,0 +1,128 @@ +//! Primitive support for debugging Pulley +//! +//! This `Debug` visitor defined in this module is what's actually used as part +//! of the interpreter loop in Pulley. Due to the code size impact of always +//! including this and the runtime overhead of always checking a flag this is +//! enabled/disabled via a `const DEBUG` below. This is currently only really +//! suitable for one-off debugging while developing locally. +//! +//! The hope is that this'll eventually evolve into something more useful, but +//! for now it's a quick-and-easy way to dump all the instructions that are +//! executed as well as the values in various registers. +//! +//! If debugging is disabled, or in `#[no_std]` mode, then this module should +//! compile away (e.g. a "zero cost abstraction"). + +use super::Interpreter; +use crate::decode::{ExtendedOpVisitor, OpVisitor}; +use crate::imms::*; +use crate::regs::*; +use alloc::string::ToString; + +// Whether or not debugging is enabled at all. +const DEBUG: bool = false; + +// Whether or not these registers are dumped between each instruction. +const DEBUG_X_REGS: bool = true; +const DEBUG_F_REGS: bool = false; + +#[cfg(not(feature = "std"))] +macro_rules! print { + ($($t:tt)*) => ({ let _ = format_args!($($t)*); }) +} +#[cfg(not(feature = "std"))] +macro_rules! println { + () => (); + ($($t:tt)*) => ({ let _ = format_args!($($t)*); }) +} + +#[repr(transparent)] +pub(super) struct Debug<'a>(pub Interpreter<'a>); + +macro_rules! debug_then_delegate { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { + $( + $( #[$field_attr:meta] )* + $field:ident : $field_ty:ty + ),* + } )? ; + )* + ) => { + $( + $( #[$attr] )* + fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) -> Self::Return { + if DEBUG { + println!( + concat!( + stringify!($snake_name), + $( + $( + " ", + stringify!($field), + "={:?}", + )* + )? + ), + $($($field),*)? + ); + } + self.0.$snake_name($( $($field),* )?) + } + )* + } +} + +impl<'a> OpVisitor for Debug<'a> { + type BytecodeStream = as OpVisitor>::BytecodeStream; + type Return = as OpVisitor>::Return; + + fn bytecode(&mut self) -> &mut Self::BytecodeStream { + self.0.bytecode() + } + + fn before_visit(&mut self) { + if !DEBUG { + return; + } + print!("\t{:?}\t", self.bytecode().as_ptr()); + } + + fn after_visit(&mut self) { + if !DEBUG { + return; + } + if DEBUG_X_REGS { + for (i, regs) in self.0.state.x_regs.chunks(4).enumerate() { + print!("\t\t"); + for (j, reg) in regs.iter().enumerate() { + let n = i * 4 + j; + let val = reg.get_u64(); + let reg = XReg::new(n as u8).unwrap().to_string(); + print!(" {reg:>3}={val:#018x}"); + } + println!(); + } + } + if DEBUG_F_REGS { + for (i, regs) in self.0.state.f_regs.chunks(4).enumerate() { + print!("\t\t"); + for (j, reg) in regs.iter().enumerate() { + let n = i * 4 + j; + let val = reg.get_f64().to_bits(); + let reg = FReg::new(n as u8).unwrap().to_string(); + print!(" {reg:>3}={val:#018x}"); + } + println!(); + } + } + } + + for_each_op!(debug_then_delegate); +} + +impl<'a> ExtendedOpVisitor for Debug<'a> { + for_each_extended_op!(debug_then_delegate); +} diff --git a/pulley/src/interp/match_loop.rs b/pulley/src/interp/match_loop.rs index f9f2d3bd0214..46d949632f75 100644 --- a/pulley/src/interp/match_loop.rs +++ b/pulley/src/interp/match_loop.rs @@ -19,8 +19,9 @@ use super::*; use crate::decode::unwrap_uninhabited; impl Interpreter<'_> { - pub fn run(mut self) -> Done { + pub fn run(self) -> Done { let mut decoder = Decoder::new(); + let mut visitor = debug::Debug(self); loop { // Here `decode_one` will call the appropriate `OpVisitor` method on // `self` via the trait implementation in the module above this. @@ -29,7 +30,7 @@ impl Interpreter<'_> { // // This will then continue indefinitely until the bytecode says it's // done. Note that only trusted bytecode is interpreted here. - match unwrap_uninhabited(decoder.decode_one(&mut self)) { + match unwrap_uninhabited(decoder.decode_one(&mut visitor)) { ControlFlow::Continue(()) => {} ControlFlow::Break(done) => break done, } diff --git a/pulley/src/interp/tail_loop.rs b/pulley/src/interp/tail_loop.rs index ddc714807820..d9f63ff6afb8 100644 --- a/pulley/src/interp/tail_loop.rs +++ b/pulley/src/interp/tail_loop.rs @@ -1,6 +1,35 @@ +//! Support executing the interpreter loop through tail-calls rather than a +//! source-level `loop`. +//! +//! This is an alternative means of executing the interpreter loop of Pulley. +//! The other method is in `match_loop.rs` which is a `loop` over a `match` +//! (more-or-less). This file instead transitions between opcodes with +//! tail-calls. +//! +//! At this time this module is more performant but disabled by default. Rust +//! does not have guaranteed tail call elimination at this time so this is not +//! a suitable means of writing an interpreter loop. That being said this is +//! included nonetheless for us to experiment and analyze with. +//! +//! There are two methods of using this module: +//! +//! * `RUSTFLAGS=--cfg=pulley_assume_llvm_makes_tail_calls` - this compilation +//! flag indicates that we should assume that LLVM will optimize to making +//! tail calls for things that look like tail calls. Practically this +//! probably only happens with `--release` and for popular native +//! architectures. It's up to the person compiling to manually +//! audit/verify/test that TCO is happening. +//! +//! * `RUSTFLAGS=--cfg=pulley_tail_calls` - this compilation flag indicates that +//! Rust's nightly-only support for guaranteed tail calls should be used. This +//! uses the `become` keyword, for example. At this time this feature of Rust +//! is highly experimental and not even complete. It only passes `cargo check` +//! at this time but doesn't actually run anywhere. + use super::*; -use crate::decode::unwrap_uninhabited; +use crate::decode::{unwrap_uninhabited, ExtendedOpVisitor}; use crate::opcode::Opcode; +use crate::ExtendedOpcode; type Handler = fn(Interpreter<'_>) -> Done; @@ -15,12 +44,20 @@ type Handler = fn(Interpreter<'_>) -> Done; /// Macro bodies are just bags of tokens; the body is not parsed until after /// they are expanded, and this macro is only expanded when `pulley_tail_calls` /// is enabled. +#[cfg(pulley_tail_calls)] macro_rules! tail_call { ($e:expr) => { become $e }; } +#[cfg(pulley_assume_llvm_makes_tail_calls)] +macro_rules! tail_call { + ($e:expr) => { + return $e + }; +} + impl Interpreter<'_> { pub fn run(mut self) -> Done { // Perform a dynamic dispatch through a function pointer indexed by @@ -101,8 +138,10 @@ macro_rules! define_opcode_handler { crate::decode::operands::$snake_name(i.bytecode()) ); )? - match OpVisitor::$snake_name(&mut i, $($($field),*)?) { - ControlFlow::Continue(()) => tail_call!(i.run()), + let _ = &mut i; + let mut debug = debug::Debug(i); + match debug.$snake_name($($($field),*)?) { + ControlFlow::Continue(()) => tail_call!(debug.0.run()), ControlFlow::Break(done) => done, } } @@ -110,28 +149,4 @@ macro_rules! define_opcode_handler { } for_each_op!(define_opcode_handler); - -macro_rules! define_extended_opcode_handler { - ($( - $( #[$attr:meta] )* - $snake_name:ident = $name:ident $( { - $( - $( #[$field_attr:meta] )* - $field:ident : $field_ty:ty - ),* - } )?; - )*) => {$( - fn $snake_name(mut i: Interpreter<'_>) -> Done { - $( - let ($($field,)*) = unwrap_uninhabited( - crate::decode::operands::$snake_name(i.bytecode()) - ); - )? - match ExtendedOpVisitor::$snake_name(&mut i, $($($field),*)?) { - ControlFlow::Continue(()) => tail_call!(i.run()), - ControlFlow::Break(done) => done, - } - } - )*}; -} -for_each_extended_op!(define_extended_opcode_handler); +for_each_extended_op!(define_opcode_handler); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 5a63e7067234..a2a462ad5b25 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -15,6 +15,75 @@ extern crate std; extern crate alloc; /// Calls the given macro with each opcode. +/// +/// # Instruction Guidelines +/// +/// We're inventing an instruction set here which naturally brings a whole set +/// of design questions. Note that this is explicitly intended to be only ever +/// used for Pulley where there are a different set of design constraints than +/// other instruction sets (e.g. general-purpose CPU ISAs). Some examples of +/// constraints for Pulley are: +/// +/// * Instructions must be portable to many architectures. +/// * The Pulley ISA is mostly target-independent as the compilation target is +/// currently only parameterized on pointer width and endianness. +/// * Pulley instructions should be balance of time-to-decode and code size. For +/// example super fancy bit-packing tricks might be tough to decode in +/// software but might be worthwhile if it's quite common and greatly reduces +/// the size of bytecode. There's not a hard-and-fast answer here, but a +/// balance to be made. +/// * Many "macro ops" are present to reduce the size of compiled bytecode so +/// there is a wide set of duplicate functionality between opcodes (and this +/// is expected). +/// +/// Given all this it's also useful to have a set of guidelines used to name and +/// develop Pulley instructions. As of the time of this writing it's still +/// pretty early days for Pulley so some of these guidelines may change over +/// time. Additionally instructions don't necessarily all follow these +/// conventions and that may also change over time. With that in mind, here's a +/// rough set of guidelines: +/// +/// * Most instructions are prefixed with `x`, `f`, or `v`, indicating which +/// type of register they're operating on. (e.g. `xadd32` operates on the `x` +/// integer registers and `fadd32` operates on the `f` float registers). +/// +/// * Most instructions are suffixed or otherwise contain the bit width they're +/// operating on. For example `xadd32` is a 32-bit addition. +/// +/// * If an instruction operates on signed or unsigned data (such as division +/// and remainder), then the instruction is suffixed with `_s` or `_u`. +/// +/// * Instructions operate on either 32 or 64-bit parts of a register. +/// Instructions modifying only 32-bits of a register always modify the "low" +/// part of a register and leave the upper part unmodified. This is intended +/// to help 32-bit platforms where if most operations are 32-bit there's no +/// need for extra instructions to sign or zero extend and modify the upper +/// half of the register. +/// +/// * Binops use `BinaryOperands` for the destination and argument registers. +/// +/// * Instructions operating on memory contain a few pieces of information: +/// +/// ```text +/// xload16le_u32_offset32 +/// │└─┬┘└┤└┤ └┬┘ └──┬───┘ +/// │ │ │ │ │ ▼ +/// │ │ │ │ │ addressing mode +/// │ │ │ │ ▼ +/// │ │ │ │ width of register modified + sign-extension (optional) +/// │ │ │ ▼ +/// │ │ │ endianness of the operation (le/be) +/// │ │ ▼ +/// │ │ bit-width of the operation +/// │ ▼ +/// │ what's happening (load/store) +/// ▼ +/// register being operated on (x/f/z) +/// ``` +/// +/// More guidelines might get added here over time, and if you have any +/// questions feel free to raise them and we can try to add them here as well! +#[macro_export] macro_rules! for_each_op { ( $macro:ident ) => { $macro! { @@ -32,13 +101,13 @@ macro_rules! for_each_op { /// Unconditionally transfer control to the PC at the given offset. jump = Jump { offset: PcRelOffset }; - /// Conditionally transfer control to the given PC offset if `cond` - /// contains a non-zero value. - br_if = BrIf { cond: XReg, offset: PcRelOffset }; + /// Conditionally transfer control to the given PC offset if + /// `low32(cond)` contains a non-zero value. + br_if32 = BrIf { cond: XReg, offset: PcRelOffset }; - /// Conditionally transfer control to the given PC offset if `cond` - /// contains a zero value. - br_if_not = BrIfNot { cond: XReg, offset: PcRelOffset }; + /// Conditionally transfer control to the given PC offset if + /// `low32(cond)` contains a zero value. + br_if_not32 = BrIfNot { cond: XReg, offset: PcRelOffset }; /// Branch if `a == b`. br_if_xeq32 = BrIfXeq32 { a: XReg, b: XReg, offset: PcRelOffset }; @@ -65,7 +134,7 @@ macro_rules! for_each_op { /// Branch if unsigned `a <= b`. br_if_xulteq64 = BrIfXulteq64 { a: XReg, b: XReg, offset: PcRelOffset }; - /// Branch to the label indicated by `idx`. + /// Branch to the label indicated by `low32(idx)`. /// /// After this instruction are `amt` instances of `PcRelOffset` /// and the `idx` selects which one will be branched to. The value @@ -97,66 +166,120 @@ macro_rules! for_each_op { /// 64-bit wrapping addition: `dst = src1 + src2`. xadd64 = Xadd64 { operands: BinaryOperands }; - /// 64-bit equality. + /// 32-bit checked unsigned addition: `low32(dst) = low32(src1) + + /// low32(src2)`. + /// + /// The upper 32-bits of `dst` are unmodified. Traps if the addition + /// overflows. + xadd32_uoverflow_trap = Xadd32UoverflowTrap { operands: BinaryOperands }; + + /// 64-bit checked unsigned addition: `dst = src1 + src2`. + xadd64_uoverflow_trap = Xadd64UoverflowTrap { operands: BinaryOperands }; + + /// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`. + /// + /// The upper 32-bits of `dst` are unmodified. + xsub32 = Xsub32 { operands: BinaryOperands }; + + /// 64-bit wrapping subtraction: `dst = src1 - src2`. + xsub64 = Xsub64 { operands: BinaryOperands }; + + /// `low32(dst) = trailing_zeros(low32(src))` + xctz32 = Xctz32 { dst: XReg, src: XReg }; + /// `dst = trailing_zeros(src)` + xctz64 = Xctz64 { dst: XReg, src: XReg }; + + /// `low32(dst) = leading_zeros(low32(src))` + xclz32 = Xclz32 { dst: XReg, src: XReg }; + /// `dst = leading_zeros(src)` + xclz64 = Xclz64 { dst: XReg, src: XReg }; + + /// `low32(dst) = low32(src1) << low5(src2)` + xshl32 = Xshl32 { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) >> low5(src2)` + xshr32_s = Xshr32S { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) >> low5(src2)` + xshr32_u = Xshr32U { operands: BinaryOperands }; + /// `dst = src1 << low5(src2)` + xshl64 = Xshl64 { operands: BinaryOperands }; + /// `dst = src1 >> low6(src2)` + xshr64_s = Xshr64S { operands: BinaryOperands }; + /// `dst = src1 >> low6(src2)` + xshr64_u = Xshr64U { operands: BinaryOperands }; + + /// `low32(dst) = src1 == src2` xeq64 = Xeq64 { operands: BinaryOperands }; - /// 64-bit inequality. + /// `low32(dst) = src1 != src2` xneq64 = Xneq64 { operands: BinaryOperands }; - /// 64-bit signed less-than. + /// `low32(dst) = src1 < src2` (signed) xslt64 = Xslt64 { operands: BinaryOperands }; - /// 64-bit signed less-than-equal. + /// `low32(dst) = src1 <= src2` (signed) xslteq64 = Xslteq64 { operands: BinaryOperands }; - /// 64-bit unsigned less-than. + /// `low32(dst) = src1 < src2` (unsigned) xult64 = Xult64 { operands: BinaryOperands }; - /// 64-bit unsigned less-than-equal. + /// `low32(dst) = src1 <= src2` (unsigned) xulteq64 = Xulteq64 { operands: BinaryOperands }; - /// 32-bit equality. + /// `low32(dst) = low32(src1) == low32(src2)` xeq32 = Xeq32 { operands: BinaryOperands }; - /// 32-bit inequality. + /// `low32(dst) = low32(src1) != low32(src2)` xneq32 = Xneq32 { operands: BinaryOperands }; - /// 32-bit signed less-than. + /// `low32(dst) = low32(src1) < low32(src2)` (signed) xslt32 = Xslt32 { operands: BinaryOperands }; - /// 32-bit signed less-than-equal. + /// `low32(dst) = low32(src1) <= low32(src2)` (signed) xslteq32 = Xslteq32 { operands: BinaryOperands }; - /// 32-bit unsigned less-than. + /// `low32(dst) = low32(src1) < low32(src2)` (unsigned) xult32 = Xult32 { operands: BinaryOperands }; - /// 32-bit unsigned less-than-equal. + /// `low32(dst) = low32(src1) <= low32(src2)` (unsigned) xulteq32 = Xulteq32 { operands: BinaryOperands }; - /// `dst = zero_extend(load32_le(ptr))` - load32_u = Load32U { dst: XReg, ptr: XReg }; - /// `dst = sign_extend(load32_le(ptr))` - load32_s = Load32S { dst: XReg, ptr: XReg }; - /// `dst = load64_le(ptr)` - load64 = Load64 { dst: XReg, ptr: XReg }; - - /// `dst = zero_extend(load32_le(ptr + offset8))` - load32_u_offset8 = Load32UOffset8 { dst: XReg, ptr: XReg, offset: i8 }; - /// `dst = sign_extend(load32_le(ptr + offset8))` - load32_s_offset8 = Load32SOffset8 { dst: XReg, ptr: XReg, offset: i8 }; - /// `dst = load64_le(ptr + offset8)` - load64_offset8 = Load64Offset8 { dst: XReg, ptr: XReg, offset: i8 }; - - /// `dst = zero_extend(load32_le(ptr + offset64))` - load32_u_offset64 = Load32UOffset64 { dst: XReg, ptr: XReg, offset: i64 }; - /// `dst = sign_extend(load32_le(ptr + offset64))` - load32_s_offset64 = Load32SOffset64 { dst: XReg, ptr: XReg, offset: i64 }; - /// `dst = load64_le(ptr + offset64)` - load64_offset64 = Load64Offset64 { dst: XReg, ptr: XReg, offset: i64 }; - - /// `*ptr = low32(src.to_le())` - store32 = Store32 { ptr: XReg, src: XReg }; - /// `*ptr = src.to_le()` - store64 = Store64 { ptr: XReg, src: XReg }; - - /// `*(ptr + sign_extend(offset8)) = low32(src).to_le()` - store32_offset8 = Store32SOffset8 { ptr: XReg, offset: i8, src: XReg }; - /// `*(ptr + sign_extend(offset8)) = src.to_le()` - store64_offset8 = Store64Offset8 { ptr: XReg, offset: i8, src: XReg }; - - /// `*(ptr + sign_extend(offset64)) = low32(src).to_le()` - store32_offset64 = Store32SOffset64 { ptr: XReg, offset: i64, src: XReg }; - /// `*(ptr + sign_extend(offset64)) = src.to_le()` - store64_offset64 = Store64Offset64 { ptr: XReg, offset: i64, src: XReg }; + /// `low32(dst) = zext(*(ptr + offset))` + xload8_u32_offset32 = XLoad8U32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload8_s32_offset32 = XLoad8S32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = zext(*(ptr + offset))` + xload16le_u32_offset32 = XLoad16LeU32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload16le_s32_offset32 = XLoad16LeS32Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `low32(dst) = *(ptr + offset)` + xload32le_offset32 = XLoad32LeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `dst = zext(*(ptr + offset))` + xload8_u64_offset32 = XLoad8U64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload8_s64_offset32 = XLoad8S64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload16le_u64_offset32 = XLoad16LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload16le_s64_offset32 = XLoad16LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload32le_u64_offset32 = XLoad32LeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload32le_s64_offset32 = XLoad32LeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + xload64le_offset32 = XLoad64LeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `*(ptr + offset) = low8(src)` + xstore8_offset32 = XStore8Offset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low16(src)` + xstore16le_offset32 = XStore16LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low32(src)` + xstore32le_offset32 = XStore32LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low64(src)` + xstore64le_offset32 = XStore64LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + + /// `low32(dst) = zext(*(ptr + offset))` + fload32le_offset32 = Fload32LeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + fload64le_offset32 = Fload64LeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = low32(src)` + fstore32le_offset32 = Fstore32LeOffset32 { ptr: XReg, offset: i32, src: FReg }; + /// `*(ptr + offset) = src` + fstore64le_offset32 = Fstore64LeOffset32 { ptr: XReg, offset: i32, src: FReg }; + + /// `dst = *(ptr + offset)` + vload128le_offset32 = VLoad128Offset32 { dst: VReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = src` + vstore128le_offset32 = Vstore128LeOffset32 { ptr: XReg, offset: i32, src: VReg }; /// `push lr; push fp; fp = sp` push_frame = PushFrame ; @@ -208,11 +331,76 @@ macro_rules! for_each_op { sext16 = Sext16 { dst: XReg, src: XReg }; /// `dst = sext(low32(src))` sext32 = Sext32 { dst: XReg, src: XReg }; + + /// `low32(dst) = low32(src1) / low32(src2)` (signed) + xdiv32_s = XDiv32S { operands: BinaryOperands }; + + /// `dst = src1 / src2` (signed) + xdiv64_s = XDiv64S { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) / low32(src2)` (unsigned) + xdiv32_u = XDiv32U { operands: BinaryOperands }; + + /// `dst = src1 / src2` (unsigned) + xdiv64_u = XDiv64U { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) % low32(src2)` (signed) + xrem32_s = XRem32S { operands: BinaryOperands }; + + /// `dst = src1 / src2` (signed) + xrem64_s = XRem64S { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) % low32(src2)` (unsigned) + xrem32_u = XRem32U { operands: BinaryOperands }; + + /// `dst = src1 / src2` (unsigned) + xrem64_u = XRem64U { operands: BinaryOperands }; + + /// `low32(dst) = low32(src1) & low32(src2)` + xand32 = XAnd32 { operands: BinaryOperands }; + /// `dst = src1 & src2` + xand64 = XAnd64 { operands: BinaryOperands }; + /// `low32(dst) = low32(src1) | low32(src2)` + xor32 = XOr32 { operands: BinaryOperands }; + /// `dst = src1 | src2` + xor64 = XOr64 { operands: BinaryOperands }; + + /// `low32(dst) = bits` + fconst32 = FConst32 { dst: FReg, bits: u32 }; + /// `dst = bits` + fconst64 = FConst64 { dst: FReg, bits: u64 }; + + /// `low32(dst) = zext(src1 == src2)` + feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 == src2)` + feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg }; + + /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)` + xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; + /// `dst = low32(cond) ? if_nonzero : if_zero` + xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; + /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)` + fselect32 = FSelect32 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg }; + /// `dst = low32(cond) ? if_nonzero : if_zero` + fselect64 = FSelect64 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg }; } }; } /// Calls the given macro with each extended opcode. +#[macro_export] macro_rules! for_each_extended_op { ( $macro:ident ) => { $macro! { @@ -246,6 +434,34 @@ macro_rules! for_each_extended_op { bswap32 = Bswap32 { dst: XReg, src: XReg }; /// `dst = byteswap(src)` bswap64 = Bswap64 { dst: XReg, src: XReg }; + + + /// `dst = zext(*(ptr + offset))` + xload16be_u64_offset32 = XLoad16BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload16be_s64_offset32 = XLoad16BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = zext(*(ptr + offset))` + xload32be_u64_offset32 = XLoad32BeU64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = sext(*(ptr + offset))` + xload32be_s64_offset32 = XLoad32BeS64Offset32 { dst: XReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + xload64be_offset32 = XLoad64BeOffset32 { dst: XReg, ptr: XReg, offset: i32 }; + + /// `*(ptr + offset) = low16(src)` + xstore16be_offset32 = XStore16BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low32(src)` + xstore32be_offset32 = XStore32BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `*(ptr + offset) = low64(src)` + xstore64be_offset32 = XStore64BeOffset32 { ptr: XReg, offset: i32, src: XReg }; + + /// `low32(dst) = zext(*(ptr + offset))` + fload32be_offset32 = Fload32BeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `dst = *(ptr + offset)` + fload64be_offset32 = Fload64BeOffset32 { dst: FReg, ptr: XReg, offset: i32 }; + /// `*(ptr + offset) = low32(src)` + fstore32be_offset32 = Fstore32BeOffset32 { ptr: XReg, offset: i32, src: FReg }; + /// `*(ptr + offset) = src` + fstore64be_offset32 = Fstore64BeOffset32 { ptr: XReg, offset: i32, src: FReg }; } }; } diff --git a/pulley/tests/all/interp.rs b/pulley/tests/all/interp.rs index 2dc6deb50173..c93aaeea7456 100644 --- a/pulley/tests/all/interp.rs +++ b/pulley/tests/all/interp.rs @@ -194,7 +194,7 @@ fn xeq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -219,7 +219,7 @@ fn xneq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -251,7 +251,7 @@ fn xslt64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -283,7 +283,7 @@ fn xslteq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -312,7 +312,7 @@ fn xult64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -341,7 +341,7 @@ fn xulteq64() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -370,7 +370,7 @@ fn xeq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -396,7 +396,7 @@ fn xneq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -430,7 +430,7 @@ fn xslt32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -462,7 +462,7 @@ fn xslteq32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -490,7 +490,7 @@ fn xult32() { }, }, x(0), - expected, + expected | 0x1234567800000000, ); } } @@ -518,210 +518,14 @@ fn xulteq32() { }, }, x(0), - expected, - ); - } - } -} - -#[test] -fn load32_u() { - let a = UnsafeCell::new(11u32.to_le()); - let b = UnsafeCell::new(22u32.to_le()); - let c = UnsafeCell::new(33u32.to_le()); - let d = UnsafeCell::new((i32::MIN as u32).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (i32::MIN as u32 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32U { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_s() { - let a = UnsafeCell::new(11u32.to_le()); - let b = UnsafeCell::new(22u32.to_le()); - let c = UnsafeCell::new(33u32.to_le()); - let d = UnsafeCell::new((-1i32 as u32).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (-1i64 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32S { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load64() { - let a = UnsafeCell::new(11u64.to_le()); - let b = UnsafeCell::new(22u64.to_le()); - let c = UnsafeCell::new(33u64.to_le()); - let d = UnsafeCell::new((-1i64 as u64).to_le()); - - for (expected, addr) in [ - (11, a.get()), - (22, b.get()), - (33, c.get()), - (-1i64 as u64, d.get()), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr)), - ], - Load64 { - dst: x(0), - ptr: x(1), - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_u_offset8() { - let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); - let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); - let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); - let d = UnsafeCell::new([(i32::MIN as u32).to_le(), (i32::MAX as u32).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 4), - (33, b.get(), 0), - (44, b.get(), 4), - (55, c.get(), 0), - (66, c.get(), 4), - (i32::MIN as u32 as u64, d.get(), 0), - (i32::MAX as u32 as u64, d.get(), 4), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32UOffset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load32_s_offset8() { - let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); - let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); - let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); - let d = UnsafeCell::new([(-1i32 as u32).to_le(), (i32::MAX as u32).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 4), - (33, b.get(), 0), - (44, b.get(), 4), - (55, c.get(), 0), - (55, unsafe { c.get().byte_add(4) }, -4), - (66, c.get(), 4), - (-1i64 as u64, d.get(), 0), - (i32::MAX as u32 as u64, d.get(), 4), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr.cast::())), - ], - Load32SOffset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, - ); - } - } -} - -#[test] -fn load64_offset8() { - let a = UnsafeCell::new([11u64.to_le(), 22u64.to_le()]); - let b = UnsafeCell::new([33u64.to_le(), 44u64.to_le()]); - let c = UnsafeCell::new([55u64.to_le(), 66u64.to_le()]); - let d = UnsafeCell::new([(-1i64 as u64).to_le(), (i64::MAX as u64).to_le()]); - - for (expected, addr, offset) in [ - (11, a.get(), 0), - (22, a.get(), 8), - (33, b.get(), 0), - (44, b.get(), 8), - (55, c.get(), 0), - (66, c.get(), 8), - (-1i64 as u64, d.get(), 0), - (i64::MAX as u64, d.get(), 8), - ] { - unsafe { - assert_one( - [ - (x(0), Val::from(0x1234567812345678u64)), - (x(1), Val::from(addr)), - ], - Load64Offset8 { - dst: x(0), - ptr: x(1), - offset, - }, - x(0), - expected, + expected | 0x1234567800000000, ); } } } #[test] -fn load32_u_offset64() { +fn xload32le_u64_offset32() { let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); @@ -743,7 +547,7 @@ fn load32_u_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr.cast::())), ], - Load32UOffset64 { + XLoad32LeU64Offset32 { dst: x(0), ptr: x(1), offset, @@ -756,7 +560,7 @@ fn load32_u_offset64() { } #[test] -fn load32_s_offset64() { +fn xload32le_s64_offset32() { let a = UnsafeCell::new([11u32.to_le(), 22u32.to_le()]); let b = UnsafeCell::new([33u32.to_le(), 44u32.to_le()]); let c = UnsafeCell::new([55u32.to_le(), 66u32.to_le()]); @@ -779,7 +583,7 @@ fn load32_s_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr.cast::())), ], - Load32SOffset64 { + XLoad32LeS64Offset32 { dst: x(0), ptr: x(1), offset, @@ -792,7 +596,7 @@ fn load32_s_offset64() { } #[test] -fn load64_offset64() { +fn xload64le_offset32() { let a = UnsafeCell::new([11u64.to_le(), 22u64.to_le()]); let b = UnsafeCell::new([33u64.to_le(), 44u64.to_le()]); let c = UnsafeCell::new([55u64.to_le(), 66u64.to_le()]); @@ -814,7 +618,7 @@ fn load64_offset64() { (x(0), Val::from(0x1234567812345678u64)), (x(1), Val::from(addr)), ], - Load64Offset64 { + XLoad64LeOffset32 { dst: x(0), ptr: x(1), offset, @@ -827,180 +631,7 @@ fn load64_offset64() { } #[test] -fn store32() { - let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - - unsafe { - for (val, addr) in [ - (0x11111111u32, a.get()), - (0x22222222, b.get().byte_add(4)), - (0x33333333, c.get().byte_add(2)), - ] { - let val = val as u64; - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32 { - ptr: x(0), - src: x(1), - }, - x(1), - val, - ); - } - } - - let a = u64::from_be_bytes(a.into_inner()); - let expected = 0x1111111112345678u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = u64::from_be_bytes(b.into_inner()); - let expected = 0x1234567822222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = u64::from_be_bytes(c.into_inner()); - let expected = 0x1234333333335678u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store64() { - let a = UnsafeCell::new(0x1234567812345678); - let b = UnsafeCell::new(0x1234567812345678); - let c = UnsafeCell::new(0x1234567812345678); - - unsafe { - for (val, addr) in [ - (0x1111111111111111u64, a.get()), - (0x2222222222222222, b.get()), - (0x3333333333333333, c.get()), - ] { - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64 { - ptr: x(0), - src: x(1), - }, - x(1), - val, - ); - } - } - - let a = a.into_inner(); - let expected = 0x1111111111111111u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = b.into_inner(); - let expected = 0x2222222222222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = c.into_inner(); - let expected = 0x3333333333333333u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store32_offset8() { - let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); - - unsafe { - for (val, addr, offset) in [ - (0x11111111u32, a.get(), 0), - (0x22222222, b.get(), 4), - (0x33333333, c.get(), 2), - ] { - let val = val as u64; - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32SOffset8 { - ptr: x(0), - src: x(1), - offset, - }, - x(1), - val, - ); - } - } - - let a = u64::from_be_bytes(a.into_inner()); - let expected = 0x1111111112345678u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let b = u64::from_be_bytes(b.into_inner()); - let expected = 0x1234567822222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let c = u64::from_be_bytes(c.into_inner()); - let expected = 0x1234333333335678u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store64_offset8() { - let a = UnsafeCell::new([0x1234567812345678, 0x1234567812345678, 0x1234567812345678]); - - unsafe { - for (val, addr, offset) in [ - (0x1111111111111111u64, a.get(), 0), - (0x2222222222222222, a.get(), 8), - (0x3333333333333333, a.get(), 16), - ] { - assert_one( - [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64Offset8 { - ptr: x(0), - src: x(1), - offset, - }, - x(1), - val, - ); - } - } - - let [a, b, c] = a.into_inner(); - - let expected = 0x1111111111111111u64; - eprintln!("expected(a) = {expected:#018x}"); - eprintln!("actual(a) = {a:#018x}"); - assert_eq!(a, expected); - - let expected = 0x2222222222222222u64; - eprintln!("expected(b) = {expected:#018x}"); - eprintln!("actual(b) = {b:#018x}"); - assert_eq!(b, expected); - - let expected = 0x3333333333333333u64; - eprintln!("expected(c) = {expected:#018x}"); - eprintln!("actual(c) = {c:#018x}"); - assert_eq!(c, expected); -} - -#[test] -fn store32_offset64() { +fn xstore32_le_offset32() { let a = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); let b = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); let c = UnsafeCell::new([0x12u8, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78]); @@ -1014,7 +645,7 @@ fn store32_offset64() { let val = val as u64; assert_one( [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store32SOffset64 { + XStore32LeOffset32 { ptr: x(0), src: x(1), offset, @@ -1045,7 +676,7 @@ fn store32_offset64() { } #[test] -fn store64_offset64() { +fn xstore64_le_offset32() { let a = UnsafeCell::new([0x1234567812345678, 0x1234567812345678, 0x1234567812345678]); unsafe { @@ -1056,7 +687,7 @@ fn store64_offset64() { ] { assert_one( [(x(0), Val::from(addr)), (x(1), Val::from(val))], - Store64Offset64 { + XStore64LeOffset32 { ptr: x(0), src: x(1), offset, diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index f945e03aa8a9..e6ce2a06b15b 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -992,6 +992,11 @@ criteria = "safe-to-deploy" version = "1.1.4" notes = "I am the author of this crate." +[[audits.arbitrary]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "1.4.1" + [[audits.arrayref]] who = "Nick Fitzgerald " criteria = "safe-to-deploy" diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index a45a8f645c89..4261bd1ab592 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -437,6 +437,13 @@ user-id = 696 user-login = "fitzgen" user-name = "Nick Fitzgerald" +[[publisher.arbitrary]] +version = "1.4.0" +when = "2024-11-04" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.async-trait]] version = "0.1.71" when = "2023-07-05" @@ -686,6 +693,13 @@ user-id = 696 user-login = "fitzgen" user-name = "Nick Fitzgerald" +[[publisher.derive_arbitrary]] +version = "1.4.0" +when = "2024-11-04" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.dlmalloc]] version = "0.2.4" when = "2022-08-17" diff --git a/tests/all/custom_code_memory.rs b/tests/all/custom_code_memory.rs new file mode 100644 index 000000000000..61afacf61fa6 --- /dev/null +++ b/tests/all/custom_code_memory.rs @@ -0,0 +1,53 @@ +#[cfg(all(not(target_os = "windows"), not(miri)))] +mod not_for_windows { + use rustix::mm::{mprotect, MprotectFlags}; + use rustix::param::page_size; + use std::sync::Arc; + use wasmtime::*; + + struct CustomCodePublish; + impl CustomCodeMemory for CustomCodePublish { + fn required_alignment(&self) -> usize { + page_size() + } + + fn publish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()> { + unsafe { + mprotect( + ptr as *mut _, + len, + MprotectFlags::READ | MprotectFlags::EXEC, + )?; + } + Ok(()) + } + + fn unpublish_executable(&self, ptr: *const u8, len: usize) -> anyhow::Result<()> { + unsafe { + mprotect( + ptr as *mut _, + len, + MprotectFlags::READ | MprotectFlags::WRITE, + )?; + } + Ok(()) + } + } + + #[test] + fn custom_code_publish() { + let mut config = Config::default(); + config.with_custom_code_memory(Some(Arc::new(CustomCodePublish))); + let engine = Engine::new(&config).unwrap(); + let module = Module::new( + &engine, + "(module (func (export \"main\") (result i32) i32.const 42))", + ) + .unwrap(); + let mut store = Store::new(&engine, ()); + let instance = Instance::new(&mut store, &module, &[]).unwrap(); + let func: TypedFunc<(), i32> = instance.get_typed_func(&mut store, "main").unwrap(); + let result = func.call(&mut store, ()).unwrap(); + assert_eq!(result, 42); + } +} diff --git a/tests/all/main.rs b/tests/all/main.rs index 09de4068abbd..1bb89e11d4ef 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -8,6 +8,7 @@ mod cli_tests; mod code_too_large; mod component_model; mod coredump; +mod custom_code_memory; mod debug; mod defaults; mod epoch_interruption; diff --git a/tests/all/memory.rs b/tests/all/memory.rs index 6c8f1c651924..fb46d40d0d36 100644 --- a/tests/all/memory.rs +++ b/tests/all/memory.rs @@ -734,3 +734,19 @@ fn get_memory_type_with_custom_page_size_from_wasm(config: &mut Config) -> Resul Ok(()) } + +#[wasmtime_test] +fn configure_zero(config: &mut Config) -> Result<()> { + config.guard_before_linear_memory(false); + config.memory_guard_size(0); + config.memory_reservation(0); + config.memory_reservation_for_growth(0); + let engine = Engine::new(&config)?; + let mut store = Store::new(&engine, ()); + + let ty = MemoryType::new(0, None); + let memory = Memory::new(&mut store, ty)?; + assert_eq!(memory.data_size(&store), 0); + + Ok(()) +} diff --git a/tests/disas/pulley/call.wat b/tests/disas/pulley/call.wat index 238e2b363d2a..57f6f28d4349 100644 --- a/tests/disas/pulley/call.wat +++ b/tests/disas/pulley/call.wat @@ -7,9 +7,9 @@ ) ;; wasm[0]::function[1]: ;; push_frame -;; load32_u_offset8 x3, x0, 44 +;; xload32le_offset32 x3, x0, 44 ;; xmov x6, x0 -;; load32_u_offset8 x0, x6, 52 +;; xload32le_offset32 x0, x6, 52 ;; xmov x1, x6 ;; call_indirect x3 ;; pop_frame diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index 1f503e82902e..687ada74d2f1 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -7,13 +7,14 @@ ) ;; wasm[0]::function[0]: ;; push_frame -;; load64_offset8 x7, x0, 8 -;; load64_offset8 x8, x0, 32 -;; load64 x8, x8 -;; load64_offset8 x7, x7, 8 -;; xulteq64 x7, x7, x8 -;; br_if x7, 0x8 // target = 0x1b -;; 19: pop_frame +;; xload64le_offset32 x8, x0, 8 +;; xload64le_offset32 x9, x0, 32 +;; xload64le_offset32 x9, x9, 0 +;; xload64le_offset32 x8, x8, 8 +;; xulteq64 x8, x8, x9 +;; zext8 x8, x8 +;; br_if32 x8, 0x8 // target = 0x2b +;; 29: pop_frame ;; ret -;; 1b: call 0x83 // target = 0x9e -;; 20: jump 0xfffffffffffffff9 // target = 0x19 +;; 2b: call 0xa2 // target = 0xcd +;; 30: jump 0xfffffffffffffff9 // target = 0x29 diff --git a/tests/disas/winch/aarch64/call/multi.wat b/tests/disas/winch/aarch64/call/multi.wat new file mode 100644 index 000000000000..8dfd5e5e4929 --- /dev/null +++ b/tests/disas/winch/aarch64/call/multi.wat @@ -0,0 +1,64 @@ +;;! target = "aarch64" +;;! test = "winch" +(module + (func $multi (result i32 i32) + i32.const 1 + i32.const 2) + + (func $start + call $multi + drop + drop) +) +;; wasm[0]::function[0]::multi: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x1 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x0, [x28] +;; mov x16, #2 +;; mov w0, w16 +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x16, #1 +;; stur w16, [x28] +;; ldur x1, [x28, #4] +;; ldur w16, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; stur w16, [x1] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::start: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; sub sp, sp, #0xc +;; mov x28, sp +;; mov x1, x9 +;; mov x2, x9 +;; ldur x0, [x28, #0xc] +;; bl #0 +;; a0: add sp, sp, #0xc +;; mov x28, sp +;; ldur x9, [x28, #0xc] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/params.wat b/tests/disas/winch/aarch64/call/params.wat new file mode 100644 index 000000000000..f7fb0484f90c --- /dev/null +++ b/tests/disas/winch/aarch64/call/params.wat @@ -0,0 +1,166 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (export "main") (param i32) (param i32) (result i32) + (local.get 1) + (local.get 0) + (i32.add) + + (call $add (i32.const 1) (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + + (local.get 1) + (local.get 0) + (i32.add) + + (call $add (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + ) + + (func $add (param i32 i32 i32 i32 i32 i32 i32 i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.add) + (local.get 2) + (i32.add) + (local.get 3) + (i32.add) + (local.get 4) + (i32.add) + (local.get 5) + (i32.add) + (local.get 6) + (i32.add) + (local.get 7) + (i32.add) + (local.get 8) + (i32.add) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28, #4] +;; ldur w1, [x28] +;; add w1, w1, w0, uxtx +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; sub sp, sp, #0x24 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #0x24] +;; mov x16, #1 +;; mov w3, w16 +;; mov x16, #2 +;; mov w4, w16 +;; mov x16, #3 +;; mov w5, w16 +;; mov x16, #4 +;; mov w6, w16 +;; mov x16, #5 +;; mov w7, w16 +;; mov x16, #6 +;; mov w16, w16 +;; stur w16, [x28] +;; mov x16, #7 +;; mov w16, w16 +;; stur w16, [x28, #8] +;; mov x16, #8 +;; mov w16, w16 +;; stur w16, [x28, #0x10] +;; bl #0x160 +;; a4: add sp, sp, #0x24 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; ldur w2, [x28] +;; add w2, w2, w1, uxtx +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w2, [x28] +;; sub sp, sp, #0x20 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #0x24] +;; ldur w3, [x28, #0x20] +;; mov x16, #2 +;; mov w4, w16 +;; mov x16, #3 +;; mov w5, w16 +;; mov x16, #4 +;; mov w6, w16 +;; mov x16, #5 +;; mov w7, w16 +;; mov x16, #6 +;; mov w16, w16 +;; stur w16, [x28] +;; mov x16, #7 +;; mov w16, w16 +;; stur w16, [x28, #8] +;; mov x16, #8 +;; mov w16, w16 +;; stur w16, [x28, #0x10] +;; bl #0x160 +;; 13c: add sp, sp, #0x20 +;; mov x28, sp +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::add: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x28 +;; mov x28, sp +;; stur x0, [x28, #0x20] +;; stur x1, [x28, #0x18] +;; stur w2, [x28, #0x14] +;; stur w3, [x28, #0x10] +;; stur w4, [x28, #0xc] +;; stur w5, [x28, #8] +;; stur w6, [x28, #4] +;; stur w7, [x28] +;; ldur w0, [x28, #0x10] +;; ldur w1, [x28, #0x14] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #0xc] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #8] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28, #4] +;; add w1, w1, w0, uxtx +;; ldur w0, [x28] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x10] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x18] +;; add w1, w1, w0, uxtx +;; ldur w0, [x29, #0x20] +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x28 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/recursive.wat b/tests/disas/winch/aarch64/call/recursive.wat new file mode 100644 index 000000000000..1bd5f243a0a1 --- /dev/null +++ b/tests/disas/winch/aarch64/call/recursive.wat @@ -0,0 +1,84 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func $fibonacci8 (param $n i32) (result i32) + (if (result i32) (i32.le_s (local.get $n) (i32.const 1)) + (then + ;; If n <= 1, return n (base case) + (local.get $n) + ) + (else + ;; Else, return fibonacci(n - 1) + fibonacci(n - 2) + (i32.add + (call $fibonacci8 + (i32.sub (local.get $n) (i32.const 1)) ;; Calculate n - 1 + ) + (call $fibonacci8 + (i32.sub (local.get $n) (i32.const 2)) ;; Calculate n - 2 + ) + ) + ) + ) + ) + (export "fib" (func $fibonacci8)) +) + +;; wasm[0]::function[0]::fibonacci8: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; cmp w0, #1 +;; cset x0, le +;; tst w0, w0 +;; b.eq #0x44 +;; b #0x3c +;; 3c: ldur w0, [x28, #4] +;; b #0xd4 +;; 44: ldur w0, [x28, #4] +;; sub w0, w0, #1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; bl #0 +;; 70: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; sub w1, w1, #2 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x1, x9 +;; ldur w2, [x28] +;; bl #0 +;; b4: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/reg_on_stack.wat b/tests/disas/winch/aarch64/call/reg_on_stack.wat new file mode 100644 index 000000000000..7e670aeca4a5 --- /dev/null +++ b/tests/disas/winch/aarch64/call/reg_on_stack.wat @@ -0,0 +1,68 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (export "") (param i32) (result i32) + local.get 0 + i32.const 1 + call 0 + i32.const 1 + call 0 + br_if 0 (;@0;) + unreachable + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w16, [x28, #4] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w16, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #1 +;; mov w2, w16 +;; bl #0 +;; 50: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #1 +;; mov w2, w16 +;; bl #0 +;; 7c: ldur x9, [x28, #0x18] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; ldur w0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; tst w1, w1 +;; b.eq #0xbc +;; b #0xb0 +;; b0: add sp, sp, #4 +;; mov x28, sp +;; b #0xc0 +;; bc: .byte 0x1f, 0xc1, 0x00, 0x00 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call/simple.wat b/tests/disas/winch/aarch64/call/simple.wat new file mode 100644 index 000000000000..f9f3ab6f8ebc --- /dev/null +++ b/tests/disas/winch/aarch64/call/simple.wat @@ -0,0 +1,69 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func $main (result i32) + (local $var i32) + (call $add (i32.const 20) (i32.const 80)) + (local.set $var (i32.const 2)) + (local.get $var) + (i32.add)) + + (func $add (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.add)) +) + +;; wasm[0]::function[0]::main: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; sub sp, sp, #8 +;; mov x28, sp +;; mov x0, x9 +;; mov x1, x9 +;; mov x16, #0x14 +;; mov w2, w16 +;; mov x16, #0x50 +;; mov w3, w16 +;; bl #0x80 +;; 4c: add sp, sp, #8 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; mov x16, #2 +;; mov w1, w16 +;; stur w1, [x28, #4] +;; ldur w1, [x28, #4] +;; add w0, w0, w1, uxtx +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]::add: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/call_indirect/call_indirect.wat b/tests/disas/winch/aarch64/call_indirect/call_indirect.wat new file mode 100644 index 000000000000..d3df79e54d95 --- /dev/null +++ b/tests/disas/winch/aarch64/call_indirect/call_indirect.wat @@ -0,0 +1,190 @@ +;;! target="aarch64" +;;! test = "winch" + +(module + (type $over-i32 (func (param i32) (result i32))) + + (table funcref + (elem + $fib-i32 + ) + ) + + (func $fib-i32 (export "fib-i32") (type $over-i32) + (if (result i32) (i32.le_u (local.get 0) (i32.const 1)) + (then (i32.const 1)) + (else + (i32.add + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 2)) + (i32.const 0) + ) + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 0) + ) + ) + ) + ) + ) +) + +;; wasm[0]::function[0]::fib-i32: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; cmp w0, #1 +;; cset x0, ls +;; tst w0, w0 +;; b.eq #0x48 +;; b #0x3c +;; 3c: mov x16, #1 +;; mov w0, w16 +;; b #0x250 +;; 48: ldur w0, [x28, #4] +;; sub w0, w0, #2 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x260 +;; 74: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0xd4 +;; b #0xa4 +;; a4: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28] +;; bl #0x3b4 +;; c4: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; b #0xd8 +;; d4: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x264 +;; dc: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x268 +;; f0: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; blr x4 +;; 128: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; ldur w1, [x28, #4] +;; sub w1, w1, #1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w0, [x28] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x26c +;; 174: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0x1e4 +;; b #0x1a4 +;; 1a4: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; sub sp, sp, #0xc +;; mov x28, sp +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28, #0xc] +;; bl #0x3b4 +;; 1cc: add sp, sp, #0xc +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x18] +;; b #0x1e8 +;; 1e4: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x270 +;; 1ec: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x274 +;; 200: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28] +;; blr x4 +;; 230: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; ldur w1, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add w1, w1, w0, uxtx +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 260: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 264: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 268: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 26c: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 270: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 274: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/call_indirect/local_arg.wat b/tests/disas/winch/aarch64/call_indirect/local_arg.wat new file mode 100644 index 000000000000..8f91824496b7 --- /dev/null +++ b/tests/disas/winch/aarch64/call_indirect/local_arg.wat @@ -0,0 +1,112 @@ +;;! target="aarch64" +;;! test = "winch" + +(module + (type $param-i32 (func (param i32))) + + (func $param-i32 (type $param-i32)) + (func (export "") + (local i32) + local.get 0 + (call_indirect (type $param-i32) (i32.const 0)) + ) + + (table funcref + (elem + $param-i32) + ) +) + +;; wasm[0]::function[0]::param-i32: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; +;; wasm[0]::function[1]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w16, [x28, #4] +;; sub sp, sp, #4 +;; mov x28, sp +;; stur w16, [x28] +;; mov x16, #0 +;; mov w1, w16 +;; mov x2, x9 +;; ldur x3, [x2, #0x60] +;; cmp x1, x3, uxtx +;; b.hs #0x168 +;; 90: mov x16, x1 +;; mov x16, #8 +;; mul x16, x16, x16 +;; ldur x2, [x2, #0x58] +;; mov x4, x2 +;; add x2, x2, x16, uxtx +;; cmp w1, w3, uxtx +;; csel x2, x4, x4, hs +;; ldur x0, [x2] +;; tst x0, x0 +;; b.ne #0xf0 +;; b #0xc0 +;; c0: sub sp, sp, #4 +;; mov x28, sp +;; stur w1, [x28] +;; mov x0, x9 +;; mov x16, #0 +;; mov w1, w16 +;; ldur w2, [x28] +;; bl #0x3a4 +;; e0: add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x14] +;; b #0xf4 +;; f0: and x0, x0, #0xfffffffffffffffe +;; cbz x0, #0x16c +;; f8: ldur x16, [x9, #0x50] +;; ldur w1, [x16] +;; ldur w2, [x0, #0x10] +;; cmp w1, w2, uxtx +;; b.ne #0x170 +;; 10c: sub sp, sp, #8 +;; mov x28, sp +;; stur x0, [x28] +;; ldur x3, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; ldur x5, [x3, #0x18] +;; ldur x4, [x3, #8] +;; sub sp, sp, #4 +;; mov x28, sp +;; mov x0, x5 +;; mov x1, x9 +;; ldur w2, [x28, #4] +;; blr x4 +;; 144: add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #4 +;; mov x28, sp +;; ldur x9, [x28, #0x10] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 168: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 16c: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 170: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat new file mode 100644 index 000000000000..a08ac25861a0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat new file mode 100644 index 000000000000..a1dd81c7be17 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat new file mode 100644 index 000000000000..15f230835a51 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; scvtf s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat new file mode 100644 index 000000000000..4cb1f7eafac5 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.convert_i32_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf s0, w0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat new file mode 100644 index 000000000000..bff1dcd790c0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf s0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat new file mode 100644 index 000000000000..cf0fcabb762c --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w1, [x28, #4] +;; ucvtf s0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat new file mode 100644 index 000000000000..467519e5c443 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w1, [x28, #4] +;; ucvtf s0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat new file mode 100644 index 000000000000..0bb3682e64f5 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i32_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.convert_i32_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf s0, w1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat new file mode 100644 index 000000000000..91c3041223de --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i64.const 1) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf s0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat new file mode 100644 index 000000000000..46218695ce16 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i64) + + (local.get 0) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; scvtf s0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat new file mode 100644 index 000000000000..9361a131dda0 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f32) + (local.get 0) + (f32.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; scvtf s0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat new file mode 100644 index 000000000000..b5fcacc18376 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i64.const 1 + f32.convert_i64_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf s0, x0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat new file mode 100644 index 000000000000..755890aee7bc --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i64.const 1) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf s0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat new file mode 100644 index 000000000000..e0bd03e9de92 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i64) + + (local.get 0) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x1, [x28] +;; ucvtf s0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat new file mode 100644 index 000000000000..db4481e19479 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f32) + (local.get 0) + (f32.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x1, [x28] +;; ucvtf s0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat b/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat new file mode 100644 index 000000000000..8a1d3493a1b1 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_convert_i64_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i64.const 1 + f32.convert_i64_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf s0, x1 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat new file mode 100644 index 000000000000..547961118308 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (i32.const 1) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; fmov s0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat new file mode 100644 index 000000000000..16b5d518c6f5 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + (local i32) + + (local.get 0) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; fmov s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat new file mode 100644 index 000000000000..72e237790399 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f32) + (local.get 0) + (f32.reinterpret_i32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; fmov s0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat new file mode 100644 index 000000000000..dc2d243dab66 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/ret_int.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + i32.const 1 + f32.reinterpret_i32 + drop + i32.const 1 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; fmov s0, w0 +;; mov x16, #1 +;; mov w0, w16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat new file mode 100644 index 000000000000..76269859f915 --- /dev/null +++ b/tests/disas/winch/aarch64/f32_reinterpret_i32/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + i32.const 1 + f32.reinterpret_i32 + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; fmov s0, w0 +;; sub sp, sp, #4 +;; mov x28, sp +;; stur s0, [x28] +;; ldur s0, [x28] +;; add sp, sp, #4 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat new file mode 100644 index 000000000000..7c6c0d395d33 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i32.const 1) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf d0, w0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat new file mode 100644 index 000000000000..ed41ea5c9d3d --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i32) + + (local.get 0) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w0, [x28, #4] +;; scvtf d0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat new file mode 100644 index 000000000000..e8e517bfc847 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f64) + (local.get 0) + (f64.convert_i32_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w0, [x28, #4] +;; scvtf d0, w0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat new file mode 100644 index 000000000000..f8f02d4dc111 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i32.const 1 + f64.convert_i32_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w0, w16 +;; scvtf d0, w0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat new file mode 100644 index 000000000000..b2831a52785e --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i32.const 1) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf d0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat new file mode 100644 index 000000000000..1f79696145df --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i32) + + (local.get 0) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur w1, [x28, #4] +;; ucvtf d0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat new file mode 100644 index 000000000000..0540e90ad4a5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (result f64) + (local.get 0) + (f64.convert_i32_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; ldur w1, [x28, #4] +;; ucvtf d0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat new file mode 100644 index 000000000000..240299c35977 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i32_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i32.const 1 + f64.convert_i32_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov w1, w16 +;; ucvtf d0, w1 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat new file mode 100644 index 000000000000..a6b2541f776a --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat new file mode 100644 index 000000000000..ea96b2b15cd5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat new file mode 100644 index 000000000000..d5bba734be30 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.convert_i64_s) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; scvtf d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat new file mode 100644 index 000000000000..5038e02fb556 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_s/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.convert_i64_s + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; scvtf d0, x0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat new file mode 100644 index 000000000000..a5289b0106e5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf d0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat new file mode 100644 index 000000000000..2f13d71619c1 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x1, [x28] +;; ucvtf d0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat new file mode 100644 index 000000000000..8bcabb2daebe --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.convert_i64_u) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x1, [x28] +;; ucvtf d0, x1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat b/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat new file mode 100644 index 000000000000..80b9a97b35ba --- /dev/null +++ b/tests/disas/winch/aarch64/f64_convert_i64_u/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.convert_i64_u + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x1, x16 +;; ucvtf d0, x1 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat new file mode 100644 index 000000000000..ee0cdfaddb59 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (i64.const 1) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; fmov d0, x0 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat new file mode 100644 index 000000000000..c8c0d9b02a98 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + (local i64) + + (local.get 0) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur x0, [x28] +;; fmov d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat new file mode 100644 index 000000000000..9b18dae34166 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (result f64) + (local.get 0) + (f64.reinterpret_i64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur x2, [x28] +;; ldur x0, [x28] +;; fmov d0, x0 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat new file mode 100644 index 000000000000..684360b374b5 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/ret_int.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + i64.const 1 + f64.reinterpret_i64 + drop + i64.const 1 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; fmov d0, x0 +;; mov x16, #1 +;; mov x0, x16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat new file mode 100644 index 000000000000..dddc29096585 --- /dev/null +++ b/tests/disas/winch/aarch64/f64_reinterpret_i64/spilled.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + i64.const 1 + f64.reinterpret_i64 + block + end + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #1 +;; mov x0, x16 +;; fmov d0, x0 +;; sub sp, sp, #8 +;; mov x28, sp +;; stur d0, [x28] +;; ldur d0, [x28] +;; add sp, sp, #8 +;; mov x28, sp +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_divs/const.wat b/tests/disas/winch/aarch64/i32_divs/const.wat new file mode 100644 index 000000000000..92a6d05d1964 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/const.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 20) + (i32.const 10) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov w0, w16 +;; mov x16, #0x14 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/one_zero.wat b/tests/disas/winch/aarch64/i32_divs/one_zero.wat new file mode 100644 index 000000000000..e70375c293fe --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/one_zero.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/overflow.wat b/tests/disas/winch/aarch64/i32_divs/overflow.wat new file mode 100644 index 000000000000..88fc9621abaa --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/overflow.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0x80000000) + (i32.const -1) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; mov x16, #0x80000000 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/params.wat b/tests/disas/winch/aarch64/i32_divs/params.wat new file mode 100644 index 000000000000..0976a08b25a8 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/params.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divs/zero_zero.wat b/tests/disas/winch/aarch64/i32_divs/zero_zero.wat new file mode 100644 index 000000000000..ce02fce00605 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divs/zero_zero.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x60 +;; 34: cmn w0, #1 +;; ccmp w1, #1, #0, eq +;; b.vs #0x64 +;; 40: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 60: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 64: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/const.wat b/tests/disas/winch/aarch64/i32_divu/const.wat new file mode 100644 index 000000000000..777d40c8ee78 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/const.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 20) + (i32.const 10) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov w0, w16 +;; mov x16, #0x14 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/one_zero.wat b/tests/disas/winch/aarch64/i32_divu/one_zero.wat new file mode 100644 index 000000000000..4b2887f1c7c6 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/one_zero.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/params.wat b/tests/disas/winch/aarch64/i32_divu/params.wat new file mode 100644 index 000000000000..f47118cc1bd8 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/params.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x54 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/signed.wat b/tests/disas/winch/aarch64/i32_divu/signed.wat new file mode 100644 index 000000000000..aa796a5cd616 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/signed.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const -1) + (i32.const -1) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; orr x16, xzr, #0xffffffff +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_divu/zero_zero.wat b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat new file mode 100644 index 000000000000..e98e8115bc80 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_divu/zero_zero.wat @@ -0,0 +1,34 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x54 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x1, x1, x0 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 54: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat new file mode 100644 index 000000000000..1e742b16b287 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (f32.const 1.0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; mov w0, v0.s[0] +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat new file mode 100644 index 000000000000..22f63fbc1d72 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (local f32) + + (local.get 0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur s0, [x28, #4] +;; mov w0, v0.s[0] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat new file mode 100644 index 000000000000..9f658cc6718a --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param f32) (result i32) + (local.get 0) + (i32.reinterpret_f32) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur s0, [x28, #4] +;; ldur s0, [x28, #4] +;; mov w0, v0.s[0] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat new file mode 100644 index 000000000000..2c8fa5b81104 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_reinterpret_f32/ret_float.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f32) + f32.const 1.0 + i32.reinterpret_f32 + drop + f32.const 1.0 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; mov w0, v0.s[0] +;; mov x16, #0x3f800000 +;; fmov s0, w16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i32_rems/const.wat b/tests/disas/winch/aarch64/i32_rems/const.wat new file mode 100644 index 000000000000..b01e691bae13 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/const.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 7) + (i32.const 5) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov w0, w16 +;; mov x16, #7 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/one_zero.wat b/tests/disas/winch/aarch64/i32_rems/one_zero.wat new file mode 100644 index 000000000000..d38495fedd4c --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/one_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/overflow.wat b/tests/disas/winch/aarch64/i32_rems/overflow.wat new file mode 100644 index 000000000000..f16ef7bacc85 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/overflow.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0x80000000) + (i32.const -1) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; mov x16, #0x80000000 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/params.wat b/tests/disas/winch/aarch64/i32_rems/params.wat new file mode 100644 index 000000000000..84fc8b067816 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/params.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_rems/zero_zero.wat b/tests/disas/winch/aarch64/i32_rems/zero_zero.wat new file mode 100644 index 000000000000..d032f4340b24 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_rems/zero_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: sxtw x0, w0 +;; sxtw x1, w1 +;; sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/const.wat b/tests/disas/winch/aarch64/i32_remu/const.wat new file mode 100644 index 000000000000..7b073a44a039 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/const.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 7) + (i32.const 5) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov w0, w16 +;; mov x16, #7 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/one_zero.wat b/tests/disas/winch/aarch64/i32_remu/one_zero.wat new file mode 100644 index 000000000000..484229500d98 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/one_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 1) + (i32.const 0) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #1 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/params.wat b/tests/disas/winch/aarch64/i32_remu/params.wat new file mode 100644 index 000000000000..d107b220b14f --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/params.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur w2, [x28, #4] +;; stur w3, [x28] +;; ldur w0, [x28] +;; ldur w1, [x28, #4] +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/signed.wat b/tests/disas/winch/aarch64/i32_remu/signed.wat new file mode 100644 index 000000000000..9f205360ab15 --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/signed.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const -1) + (i32.const -1) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; orr x16, xzr, #0xffffffff +;; mov w0, w16 +;; orr x16, xzr, #0xffffffff +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i32_remu/zero_zero.wat b/tests/disas/winch/aarch64/i32_remu/zero_zero.wat new file mode 100644 index 000000000000..4b5b48b8d14a --- /dev/null +++ b/tests/disas/winch/aarch64/i32_remu/zero_zero.wat @@ -0,0 +1,35 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i32) + (i32.const 0) + (i32.const 0) + (i32.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov w0, w16 +;; mov x16, #0 +;; mov w1, w16 +;; cbz x0, #0x58 +;; 34: mov w0, w0 +;; mov w1, w1 +;; udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov w0, w1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/const.wat b/tests/disas/winch/aarch64/i64_divs/const.wat new file mode 100644 index 000000000000..8c81a2e4b499 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/const.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 20) + (i64.const 10) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov x0, x16 +;; mov x16, #0x14 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/one_zero.wat b/tests/disas/winch/aarch64/i64_divs/one_zero.wat new file mode 100644 index 000000000000..61fb7b28278f --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/one_zero.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/overflow.wat b/tests/disas/winch/aarch64/i64_divs/overflow.wat new file mode 100644 index 000000000000..eadec00b4324 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/overflow.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0x8000000000000000) + (i64.const -1) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-0x8000000000000000 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/params.wat b/tests/disas/winch/aarch64/i64_divs/params.wat new file mode 100644 index 000000000000..72c5f9c3df1d --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/params.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divs/zero_zero.wat b/tests/disas/winch/aarch64/i64_divs/zero_zero.wat new file mode 100644 index 000000000000..c356678cbd80 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divs/zero_zero.wat @@ -0,0 +1,36 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.div_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x58 +;; 34: cmn x0, #1 +;; ccmp x1, #1, #0, eq +;; b.vs #0x5c +;; 40: sdiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 58: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 5c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/const.wat b/tests/disas/winch/aarch64/i64_divu/const.wat new file mode 100644 index 000000000000..23dd7cca4866 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/const.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 20) + (i64.const 10) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0xa +;; mov x0, x16 +;; mov x16, #0x14 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/one_zero.wat b/tests/disas/winch/aarch64/i64_divu/one_zero.wat new file mode 100644 index 000000000000..bd6a84f50cce --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/one_zero.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/params.wat b/tests/disas/winch/aarch64/i64_divu/params.wat new file mode 100644 index 000000000000..5bbefb7aea88 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/params.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/signed.wat b/tests/disas/winch/aarch64/i64_divu/signed.wat new file mode 100644 index 000000000000..7d425f4e4e5c --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/signed.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const -1) + (i64.const -1) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-1 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_divu/zero_zero.wat b/tests/disas/winch/aarch64/i64_divu/zero_zero.wat new file mode 100644 index 000000000000..71f9c4285d0b --- /dev/null +++ b/tests/disas/winch/aarch64/i64_divu/zero_zero.wat @@ -0,0 +1,32 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.div_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x4c +;; 34: udiv x1, x1, x0 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 4c: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat new file mode 100644 index 000000000000..7949beae66cb --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/const.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (f64.const 1.0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; mov x0, v0.d[0] +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat new file mode 100644 index 000000000000..7ab5e90b47ca --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/locals.wat @@ -0,0 +1,28 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (local f64) + + (local.get 0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; mov x16, #0 +;; stur x16, [x28] +;; ldur d0, [x28] +;; mov x0, v0.d[0] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat new file mode 100644 index 000000000000..8e6ff8f72344 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param f64) (result i64) + (local.get 0) + (i64.reinterpret_f64) + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x18 +;; mov x28, sp +;; stur x0, [x28, #0x10] +;; stur x1, [x28, #8] +;; stur d0, [x28] +;; ldur d0, [x28] +;; mov x0, v0.d[0] +;; add sp, sp, #0x18 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat new file mode 100644 index 000000000000..93c3f9b19f94 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_reinterpret_f64/ret_float.wat @@ -0,0 +1,29 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result f64) + f64.const 1.0 + i64.reinterpret_f64 + drop + f64.const 1.0 + ) +) +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; mov x0, v0.d[0] +;; mov x16, #0x3ff0000000000000 +;; fmov d0, x16 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret diff --git a/tests/disas/winch/aarch64/i64_rems/const.wat b/tests/disas/winch/aarch64/i64_rems/const.wat new file mode 100644 index 000000000000..b5c29486f77e --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/const.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 7) + (i64.const 5) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov x0, x16 +;; mov x16, #7 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/one_zero.wat b/tests/disas/winch/aarch64/i64_rems/one_zero.wat new file mode 100644 index 000000000000..144abaaa0f50 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/one_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/overflow.wat b/tests/disas/winch/aarch64/i64_rems/overflow.wat new file mode 100644 index 000000000000..78b82a695075 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/overflow.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0x8000000000000000) + (i64.const -1) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-0x8000000000000000 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/params.wat b/tests/disas/winch/aarch64/i64_rems/params.wat new file mode 100644 index 000000000000..3022b8adc970 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/params.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_rems/zero_zero.wat b/tests/disas/winch/aarch64/i64_rems/zero_zero.wat new file mode 100644 index 000000000000..59819f6ed1ba --- /dev/null +++ b/tests/disas/winch/aarch64/i64_rems/zero_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.rem_s) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: sdiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/const.wat b/tests/disas/winch/aarch64/i64_remu/const.wat new file mode 100644 index 000000000000..07b65bc1a32e --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/const.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 7) + (i64.const 5) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #5 +;; mov x0, x16 +;; mov x16, #7 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/one_zero.wat b/tests/disas/winch/aarch64/i64_remu/one_zero.wat new file mode 100644 index 000000000000..a9a756cb5684 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/one_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 1) + (i64.const 0) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/params.wat b/tests/disas/winch/aarch64/i64_remu/params.wat new file mode 100644 index 000000000000..b1244e4315d0 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/params.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (param i64) (param i64) (result i64) + (local.get 0) + (local.get 1) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x20 +;; mov x28, sp +;; stur x0, [x28, #0x18] +;; stur x1, [x28, #0x10] +;; stur x2, [x28, #8] +;; stur x3, [x28] +;; ldur x0, [x28] +;; ldur x1, [x28, #8] +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x20 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/signed.wat b/tests/disas/winch/aarch64/i64_remu/signed.wat new file mode 100644 index 000000000000..866d842403a8 --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/signed.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const -1) + (i64.const -1) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #-1 +;; mov x0, x16 +;; mov x16, #-1 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/aarch64/i64_remu/zero_zero.wat b/tests/disas/winch/aarch64/i64_remu/zero_zero.wat new file mode 100644 index 000000000000..c2d2c6786daf --- /dev/null +++ b/tests/disas/winch/aarch64/i64_remu/zero_zero.wat @@ -0,0 +1,33 @@ +;;! target = "aarch64" +;;! test = "winch" + +(module + (func (result i64) + (i64.const 0) + (i64.const 0) + (i64.rem_u) + ) +) + +;; wasm[0]::function[0]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov x28, sp +;; mov x9, x0 +;; sub sp, sp, #0x10 +;; mov x28, sp +;; stur x0, [x28, #8] +;; stur x1, [x28] +;; mov x16, #0 +;; mov x0, x16 +;; mov x16, #0 +;; mov x1, x16 +;; cbz x0, #0x50 +;; 34: udiv x16, x1, x0 +;; msub x1, x0, x16, x1 +;; mov x0, x1 +;; add sp, sp, #0x10 +;; mov x28, sp +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 50: .byte 0x1f, 0xc1, 0x00, 0x00 diff --git a/tests/disas/winch/x64/call/multi.wat b/tests/disas/winch/x64/call/multi.wat new file mode 100644 index 000000000000..60a18893262d --- /dev/null +++ b/tests/disas/winch/x64/call/multi.wat @@ -0,0 +1,62 @@ +;;! target = "x86_64" +;;! test = "winch" +(module + (func $multi (result i32 i32) + i32.const 1 + i32.const 2) + + (func $start + call $multi + drop + drop) +) +;; wasm[0]::function[0]::multi: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rsi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x24, %r11 +;; cmpq %rsp, %r11 +;; ja 0x58 +;; 1c: movq %rsi, %r14 +;; subq $0x20, %rsp +;; movq %rsi, 0x18(%rsp) +;; movq %rdx, 0x10(%rsp) +;; movq %rdi, 8(%rsp) +;; movl $2, %eax +;; subq $4, %rsp +;; movl $1, (%rsp) +;; movq 0xc(%rsp), %rcx +;; movl (%rsp), %r11d +;; addq $4, %rsp +;; movl %r11d, (%rcx) +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 58: ud2 +;; +;; wasm[0]::function[1]::start: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0xb7 +;; 7c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; subq $4, %rsp +;; subq $0xc, %rsp +;; movq %r14, %rsi +;; movq %r14, %rdx +;; leaq 0xc(%rsp), %rdi +;; callq 0 +;; addq $0xc, %rsp +;; movq 0xc(%rsp), %r14 +;; addq $4, %rsp +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; b7: ud2 diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index b23096bc44ed..9835ce83dee1 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -308,9 +308,9 @@ impl<'a> CodeGenContext<'a, Emission> { /// Prepares arguments for emitting a binary operation. /// /// The `emit` function returns the `TypedReg` to put on the value stack. - pub fn binop(&mut self, masm: &mut M, size: OperandSize, mut emit: F) + pub fn binop(&mut self, masm: &mut M, size: OperandSize, emit: F) where - F: FnMut(&mut M, Reg, Reg, OperandSize) -> TypedReg, + F: FnOnce(&mut M, Reg, Reg, OperandSize) -> TypedReg, M: MacroAssembler, { let src = self.pop_to_reg(masm, None); @@ -321,9 +321,9 @@ impl<'a> CodeGenContext<'a, Emission> { } /// Prepares arguments for emitting an f32 or f64 comparison operation. - pub fn float_cmp_op(&mut self, masm: &mut M, size: OperandSize, mut emit: F) + pub fn float_cmp_op(&mut self, masm: &mut M, size: OperandSize, emit: F) where - F: FnMut(&mut M, Reg, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, Reg, OperandSize), M: MacroAssembler, { let src2 = self.pop_to_reg(masm, None); @@ -371,9 +371,9 @@ impl<'a> CodeGenContext<'a, Emission> { /// Prepares arguments for emitting an i64 binary operation. /// /// The `emit` function returns the `TypedReg` to put on the value stack. - pub fn i64_binop(&mut self, masm: &mut M, mut emit: F) + pub fn i64_binop(&mut self, masm: &mut M, emit: F) where - F: FnMut(&mut M, Reg, RegImm, OperandSize) -> TypedReg, + F: FnOnce(&mut M, Reg, RegImm, OperandSize) -> TypedReg, M: MacroAssembler, { let top = self.stack.peek().expect("value at stack top"); @@ -393,9 +393,9 @@ impl<'a> CodeGenContext<'a, Emission> { } /// Prepares arguments for emitting a convert operation. - pub fn convert_op(&mut self, masm: &mut M, dst_ty: WasmValType, mut emit: F) + pub fn convert_op(&mut self, masm: &mut M, dst_ty: WasmValType, emit: F) where - F: FnMut(&mut M, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, OperandSize), M: MacroAssembler, { let src = self.pop_to_reg(masm, None); @@ -422,9 +422,9 @@ impl<'a> CodeGenContext<'a, Emission> { masm: &mut M, dst_ty: WasmValType, tmp_reg_class: RegClass, - mut emit: F, + emit: F, ) where - F: FnMut(&mut M, Reg, Reg, Reg, OperandSize), + F: FnOnce(&mut M, Reg, Reg, Reg, OperandSize), M: MacroAssembler, { let tmp_gpr = self.reg_for_class(tmp_reg_class, masm); diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs index e5bb5ee17502..6bbc4994ac34 100644 --- a/winch/codegen/src/isa/aarch64/asm.rs +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -1,23 +1,26 @@ //! Assembler library implementation for Aarch64. use super::{address::Address, regs}; -use crate::masm::{ExtendKind, FloatCmpKind, IntCmpKind, RoundingMode, ShiftKind}; +use crate::aarch64::regs::zero; +use crate::masm::{ + DivKind, ExtendKind, FloatCmpKind, IntCmpKind, RemKind, RoundingMode, ShiftKind, +}; +use crate::CallingConvention; use crate::{ masm::OperandSize, reg::{writable, Reg, WritableReg}, }; -use cranelift_codegen::ir::TrapCode; -use cranelift_codegen::isa::aarch64::inst::{ - BitOp, BranchTarget, Cond, CondBrKind, FPULeftShiftImm, FPUOp1, FPUOp2, - FPUOpRI::{self, UShr32, UShr64}, - FPUOpRIMod, FPURightShiftImm, FpuRoundMode, ImmLogic, ImmShift, ScalarSize, -}; +use cranelift_codegen::isa::aarch64::inst::{UImm5, NZCV}; use cranelift_codegen::{ - ir::{MemFlags, SourceLoc}, + ir::{ExternalName, LibCall, MemFlags, SourceLoc, TrapCode, UserExternalNameRef}, isa::aarch64::inst::{ self, emit::{EmitInfo, EmitState}, - ALUOp, ALUOp3, AMode, ExtendOp, Imm12, Inst, PairAMode, VecLanesOp, VecMisc2, VectorSize, + ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, FPULeftShiftImm, + FPUOp1, FPUOp2, + FPUOpRI::{self, UShr32, UShr64}, + FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp, + PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, }, settings, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, Writable, @@ -201,7 +204,7 @@ impl Assembler { self.ldr(addr, rd, size, false); } - /// Load a register. + /// Load address into a register. fn ldr(&mut self, addr: Address, rd: WritableReg, size: OperandSize, signed: bool) { use OperandSize::*; let writable_reg = rd.map(Into::into); @@ -397,6 +400,120 @@ impl Assembler { self.emit_alu_rrrr(ALUOp3::MAdd, scratch, rn, rd, regs::zero(), size); } + /// Signed/unsigned division with three registers. + pub fn div_rrr( + &mut self, + divisor: Reg, + dividend: Reg, + dest: Writable, + kind: DivKind, + size: OperandSize, + ) { + // Check for division by 0. + self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO); + + // check for overflow + if kind == DivKind::Signed { + // Check for divisor overflow. + self.emit_alu_rri( + ALUOp::AddS, + Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"), + divisor, + writable!(zero()), + size, + ); + + // Check if the dividend is 1. + self.emit(Inst::CCmpImm { + size: size.into(), + rn: dividend.into(), + imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"), + nzcv: NZCV::new(false, false, false, false), + cond: Cond::Eq, + }); + + // Finally, trap if the previous operation overflowed. + self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW); + } + + // `cranelift-codegen` doesn't support emitting u/sdiv for anything but I64, + // we therefore sign-extend the operand. + // see: https://github.com/bytecodealliance/wasmtime/issues/9766 + if size == OperandSize::S32 { + let extend_kind = if kind == DivKind::Signed { + ExtendKind::I64Extend32S + } else { + ExtendKind::I64ExtendI32U + }; + + self.extend(divisor, writable!(divisor), extend_kind); + self.extend(dividend, writable!(dividend), extend_kind); + } + + let op = match kind { + DivKind::Signed => ALUOp::SDiv, + DivKind::Unsigned => ALUOp::UDiv, + }; + + self.emit_alu_rrr( + op, + divisor, + dividend, + dest.map(Into::into), + OperandSize::S64, + ); + } + + /// Signed/unsigned remainder operation with three registers. + pub fn rem_rrr( + &mut self, + divisor: Reg, + dividend: Reg, + dest: Writable, + kind: RemKind, + size: OperandSize, + ) { + // Check for division by 0 + self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO); + + // `cranelift-codegen` doesn't support emitting u/sdiv for anything but I64, + // we therefore sign-extend the operand. + // see: https://github.com/bytecodealliance/wasmtime/issues/9766 + if size == OperandSize::S32 { + let extend_kind = if kind.is_signed() { + ExtendKind::I64Extend32S + } else { + ExtendKind::I64ExtendI32U + }; + + self.extend(divisor, writable!(divisor), extend_kind); + self.extend(dividend, writable!(dividend), extend_kind); + } + + let op = match kind { + RemKind::Signed => ALUOp::SDiv, + RemKind::Unsigned => ALUOp::UDiv, + }; + + let scratch = regs::scratch(); + self.emit_alu_rrr( + op, + divisor, + dividend, + writable!(scratch.into()), + OperandSize::S64, + ); + + self.emit_alu_rrrr( + ALUOp3::MSub, + scratch, + divisor, + dest.map(Into::into), + dividend, + OperandSize::S64, + ); + } + /// And with three registers. pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit_alu_rrr(ALUOp::And, rm, rn, rd, size); @@ -600,6 +717,52 @@ impl Assembler { }) } + /// Convert an signed integer to a float. + pub fn cvt_sint_to_float( + &mut self, + rn: Reg, + rd: WritableReg, + src_size: OperandSize, + dst_size: OperandSize, + ) { + let op = match (src_size, dst_size) { + (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32, + (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32, + (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64, + (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64, + _ => unreachable!(), + }; + + self.emit(Inst::IntToFpu { + op, + rd: rd.map(Into::into), + rn: rn.into(), + }); + } + + /// Convert an unsigned integer to a float. + pub fn cvt_uint_to_float( + &mut self, + rn: Reg, + rd: WritableReg, + src_size: OperandSize, + dst_size: OperandSize, + ) { + let op = match (src_size, dst_size) { + (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32, + (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32, + (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64, + (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64, + _ => unreachable!(), + }; + + self.emit(Inst::IntToFpu { + op, + rd: rd.map(Into::into), + rn: rn.into(), + }); + } + /// Change precision of float. pub fn cvt_float_to_float( &mut self, @@ -724,6 +887,14 @@ impl Assembler { }); } + /// Trap if `rn` is zero. + pub fn trapz(&mut self, rn: Reg, code: TrapCode) { + self.emit(Inst::TrapIf { + kind: CondBrKind::Zero(rn.into()), + trap_code: code, + }); + } + // Helpers for ALU operations. fn emit_alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { @@ -901,4 +1072,38 @@ impl Assembler { pub fn buffer(&self) -> &MachBuffer { &self.buffer } + + /// Emit a direct call to a function defined locally and + /// referenced to by `name`. + pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) { + self.emit(Inst::Call { + info: Box::new(cranelift_codegen::CallInfo::empty( + ExternalName::user(name), + call_conv.into(), + )), + }) + } + + /// Emit an indirect call to a function whose address is + /// stored the `callee` register. + pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) { + self.emit(Inst::CallInd { + info: Box::new(cranelift_codegen::CallInfo::empty( + callee.into(), + call_conv.into(), + )), + }) + } + + /// Emit a call to a well-known libcall. + /// `dst` is used as a scratch register to hold the address of the libcall function. + pub fn call_with_lib(&mut self, lib: LibCall, dst: Reg, call_conv: CallingConvention) { + let name = ExternalName::LibCall(lib); + self.emit(Inst::LoadExtName { + rd: writable!(dst.into()), + name: name.into(), + offset: 0, + }); + self.call_with_reg(dst, call_conv) + } } diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 76a81cea3c3c..fa91909aba3a 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1,6 +1,11 @@ -use super::{abi::Aarch64ABI, address::Address, asm::Assembler, regs}; +use super::{ + abi::Aarch64ABI, + address::Address, + asm::Assembler, + regs::{self, scratch}, +}; use crate::{ - abi::local::LocalSlot, + abi::{self, align_to, calculate_frame_adjustment, local::LocalSlot, vmctx}, codegen::{ptr_type_from_ptr_size, CodeGenContext, Emission, FuncEnv}, isa::{ reg::{writable, Reg, WritableReg}, @@ -11,6 +16,7 @@ use crate::{ MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, RoundingMode, SPOffset, ShiftKind, StackSlot, TrapCode, TruncKind, }, + stack::TypedReg, }; use cranelift_codegen::{ binemit::CodeOffset, @@ -19,7 +25,7 @@ use cranelift_codegen::{ settings, Final, MachBufferFinalized, MachLabel, }; use regalloc2::RegClass; -use wasmtime_environ::PtrSize; +use wasmtime_environ::{PtrSize, WasmValType}; /// Aarch64 MacroAssembler. pub(crate) struct MacroAssembler { @@ -126,8 +132,8 @@ impl Masm for MacroAssembler { Address::from_shadow_sp(offset.as_u32() as i64) } - fn address_at_vmctx(&self, _offset: u32) -> Self::Address { - todo!() + fn address_at_vmctx(&self, offset: u32) -> Self::Address { + Address::offset(vmctx!(Self), offset as i64) } fn store_ptr(&mut self, src: Reg, dst: Self::Address) { @@ -165,10 +171,23 @@ impl Masm for MacroAssembler { fn call( &mut self, - _stack_args_size: u32, - _load_callee: impl FnMut(&mut Self) -> (CalleeKind, CallingConvention), + stack_args_size: u32, + mut load_callee: impl FnMut(&mut Self) -> (CalleeKind, CallingConvention), ) -> u32 { - todo!() + let alignment: u32 = ::call_stack_align().into(); + let addend: u32 = ::arg_base_offset().into(); + let delta = calculate_frame_adjustment(self.sp_offset().as_u32(), addend, alignment); + let aligned_args_size = align_to(stack_args_size, alignment); + let total_stack = delta + aligned_args_size; + self.reserve_stack(total_stack); + let (callee, call_conv) = load_callee(self); + match callee { + CalleeKind::Indirect(reg) => self.asm.call_with_reg(reg, call_conv), + CalleeKind::Direct(idx) => self.asm.call_with_name(idx, call_conv), + CalleeKind::LibCall(lib) => self.asm.call_with_lib(lib, scratch(), call_conv), + } + + total_stack } fn load(&mut self, src: Address, dst: WritableReg, size: OperandSize) { @@ -195,8 +214,8 @@ impl Masm for MacroAssembler { } } - fn load_addr(&mut self, _src: Self::Address, _dst: WritableReg, _size: OperandSize) { - todo!() + fn load_addr(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) { + self.asm.uload(src, dst, size); } fn pop(&mut self, dst: WritableReg, size: OperandSize) { @@ -452,12 +471,28 @@ impl Masm for MacroAssembler { context.stack.push(dst.into()); } - fn div(&mut self, _context: &mut CodeGenContext, _kind: DivKind, _size: OperandSize) { - todo!() + fn div(&mut self, context: &mut CodeGenContext, kind: DivKind, size: OperandSize) { + context.binop(self, size, |this, dividend, divisor, size| { + this.asm + .div_rrr(divisor, dividend, writable!(dividend), kind, size); + match size { + OperandSize::S32 => TypedReg::new(WasmValType::I32, dividend), + OperandSize::S64 => TypedReg::new(WasmValType::I64, dividend), + s => unreachable!("invalid size for division: {s:?}"), + } + }) } - fn rem(&mut self, _context: &mut CodeGenContext, _kind: RemKind, _size: OperandSize) { - todo!() + fn rem(&mut self, context: &mut CodeGenContext, kind: RemKind, size: OperandSize) { + context.binop(self, size, |this, dividend, divisor, size| { + this.asm + .rem_rrr(divisor, dividend, writable!(dividend), kind, size); + match size { + OperandSize::S32 => TypedReg::new(WasmValType::I32, dividend), + OperandSize::S64 => TypedReg::new(WasmValType::I64, dividend), + s => unreachable!("invalid size for remainder: {s:?}"), + } + }) } fn zero(&mut self, reg: WritableReg) { @@ -500,31 +535,31 @@ impl Masm for MacroAssembler { fn signed_convert( &mut self, - _dst: WritableReg, - _src: Reg, - _src_size: OperandSize, - _dst_size: OperandSize, + dst: WritableReg, + src: Reg, + src_size: OperandSize, + dst_size: OperandSize, ) { - todo!() + self.asm.cvt_sint_to_float(src, dst, src_size, dst_size); } fn unsigned_convert( &mut self, - _dst: WritableReg, - _src: Reg, + dst: WritableReg, + src: Reg, _tmp_gpr: Reg, - _src_size: OperandSize, - _dst_size: OperandSize, + src_size: OperandSize, + dst_size: OperandSize, ) { - todo!() + self.asm.cvt_uint_to_float(src, dst, src_size, dst_size); } - fn reinterpret_float_as_int(&mut self, _dst: WritableReg, _src: Reg, _size: OperandSize) { - todo!() + fn reinterpret_float_as_int(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { + self.asm.mov_from_vec(src, dst, 0, size); } - fn reinterpret_int_as_float(&mut self, _dst: WritableReg, _src: Reg, _size: OperandSize) { - todo!() + fn reinterpret_int_as_float(&mut self, dst: WritableReg, src: Reg, size: OperandSize) { + self.asm.mov_to_fpu(src, dst, size); } fn demote(&mut self, dst: WritableReg, src: Reg) { @@ -662,8 +697,8 @@ impl Masm for MacroAssembler { self.asm.udf(code); } - fn trapz(&mut self, _src: Reg, _code: TrapCode) { - todo!() + fn trapz(&mut self, src: Reg, code: TrapCode) { + self.asm.trapz(src, code); } fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index da25d88c486a..9cb30372fcf9 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -23,6 +23,7 @@ pub(crate) enum DivKind { } /// Remainder kind. +#[derive(Copy, Clone)] pub(crate) enum RemKind { /// Signed remainder. Signed, @@ -30,6 +31,12 @@ pub(crate) enum RemKind { Unsigned, } +impl RemKind { + pub fn is_signed(&self) -> bool { + matches!(self, Self::Signed) + } +} + #[derive(Eq, PartialEq)] pub(crate) enum MulWideKind { Signed, @@ -163,6 +170,7 @@ pub(crate) enum ShiftKind { /// Kinds of extends in WebAssembly. Each MacroAssembler implementation /// is responsible for emitting the correct sequence of instructions when /// lowering to machine code. +#[derive(Copy, Clone)] pub(crate) enum ExtendKind { /// Sign extends i32 to i64. I64ExtendI32S,