From 2b1fb6576700ed3c4696e6c9dbc5e6fd60fa4189 Mon Sep 17 00:00:00 2001 From: Aman Date: Fri, 3 Jan 2025 01:42:46 +0000 Subject: [PATCH] polkavm: Replace orc.b with an instruction sequence in the linker. And get rid orc.b emulation code from the interpreter and the recompiler. We are making this change because orc.b is rarely used and a very complicated instruction in itself, we don't need to support or upstream such a complicated instruction to Polkavm. Therefore let's emulate the instruction within the linker. In addition, we are adding a warn print for the linker user to make them aware that orc.b is being replaced with an instruction sequence, and any perf benefit that the user is expecting would not be visible. Signed-off-by: Aman --- crates/polkavm-common/src/program.rs | 7 -- crates/polkavm-linker/src/program_from_elf.rs | 82 ++++++++++++++++++- crates/polkavm/src/compiler.rs | 11 --- crates/polkavm/src/compiler/amd64.rs | 47 ----------- crates/polkavm/src/gas.rs | 5 -- crates/polkavm/src/interpreter.rs | 44 ---------- 6 files changed, 79 insertions(+), 117 deletions(-) diff --git a/crates/polkavm-common/src/program.rs b/crates/polkavm-common/src/program.rs index 7828fa7d..49ef0c8f 100644 --- a/crates/polkavm-common/src/program.rs +++ b/crates/polkavm-common/src/program.rs @@ -1602,7 +1602,6 @@ define_opcodes! { [I_64, I_32] sign_extend_8 = 45, [I_64, I_32] sign_extend_16 = 46, [I_64, I_32] zero_extend_16 = 47, - [I_64, I_32] or_combine_byte = 48, [I_64, I_32] reverse_byte = 49, ] @@ -2415,12 +2414,6 @@ impl<'a, 'b, 'c> InstructionVisitor for InstructionFormatter<'a, 'b, 'c> { write!(self, "{d} = zext.h {s}") } - fn or_combine_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy { - let d = self.format_reg(d); - let s = self.format_reg(s); - write!(self, "{d} = orc.b {s}") - } - fn reverse_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy { let d = self.format_reg(d); let s = self.format_reg(s); diff --git a/crates/polkavm-linker/src/program_from_elf.rs b/crates/polkavm-linker/src/program_from_elf.rs index 4e1879d3..4eca738c 100644 --- a/crates/polkavm-linker/src/program_from_elf.rs +++ b/crates/polkavm-linker/src/program_from_elf.rs @@ -1811,6 +1811,73 @@ fn resolve_simple_zero_register_usage( false } +fn emit_or_combine_byte( + location: SectionTarget, + dst: Reg, + src: Reg, + rv64: bool, + mut emit: impl FnMut(InstExt), +) { + let op_reg = dst; + let cmp_reg = Reg::E1; + let tmp_reg = Reg::E2; + let mask_reg = if dst != src { src } else { Reg::E3 }; + let range = if rv64 { 0..64 } else { 0..32 }; + + log::warn!("Emulating orc.b at {:?} with an instruction sequence", location); + + if dst != src { + emit(InstExt::Basic(BasicInst::MoveReg { dst, src })); + } + + // Loop: + // mov tmp, op + // shl mask, 8 + // or tmp, mask + // test op, mask + // cmov.neq op, tmp + + for iter in range.step_by(8) { + emit(InstExt::Basic(BasicInst::MoveReg { dst: tmp_reg, src: op_reg })); + + if iter == 0 { + emit(InstExt::Basic(BasicInst::LoadImmediate { dst: mask_reg, imm: 0xff })); + } else { + emit(InstExt::Basic(BasicInst::AnyAny { + kind: if rv64 { + AnyAnyKind::ShiftLogicalLeft64 + } else { + AnyAnyKind::ShiftLogicalLeft32 + }, + dst: mask_reg, + src1: RegImm::Reg(mask_reg), + src2: RegImm::Imm(8), + })); + } + + emit(InstExt::Basic(BasicInst::AnyAny { + kind: if rv64 { AnyAnyKind::Or64 } else { AnyAnyKind::Or32 }, + dst: tmp_reg, + src1: RegImm::Reg(tmp_reg), + src2: RegImm::Reg(mask_reg), + })); + + emit(InstExt::Basic(BasicInst::AnyAny { + kind: if rv64 { AnyAnyKind::And64 } else { AnyAnyKind::And32 }, + dst: cmp_reg, + src1: RegImm::Reg(op_reg), + src2: RegImm::Reg(mask_reg), + })); + + emit(InstExt::Basic(BasicInst::Cmov { + kind: CmovKind::NotEqZero, + dst: op_reg, + src: RegImm::Reg(tmp_reg), + cond: cmp_reg, + })); + } +} + fn convert_instruction( elf: &Elf, section: &Section, @@ -2005,6 +2072,17 @@ where }; use crate::riscv::RegKind as K; + + // + // if OrCombineByte then emit function call + // + + if kind == K::OrCombineByte { + emit_or_combine_byte(current_location, dst, src, rv64, &mut emit); + emit(InstExt::nop()); + return Ok(()); + } + let kind = match kind { K::CountLeadingZeroBits32 => RegKind::CountLeadingZeroBits32, K::CountLeadingZeroBits64 => RegKind::CountLeadingZeroBits64, @@ -2012,11 +2090,11 @@ where K::CountSetBits64 => RegKind::CountSetBits64, K::CountTrailingZeroBits32 => RegKind::CountTrailingZeroBits32, K::CountTrailingZeroBits64 => RegKind::CountTrailingZeroBits64, - K::OrCombineByte => RegKind::OrCombineByte, K::ReverseByte => RegKind::ReverseByte, K::SignExtend8 => RegKind::SignExtend8, K::SignExtend16 => RegKind::SignExtend16, K::ZeroExtend16 => RegKind::ZeroExtend16, + K::OrCombineByte => unreachable!(), }; emit(InstExt::Basic(BasicInst::Reg { kind, dst, src })); @@ -3702,7 +3780,6 @@ pub enum RegKind { CountSetBits64, CountTrailingZeroBits32, CountTrailingZeroBits64, - OrCombineByte, ReverseByte, SignExtend8, SignExtend16, @@ -7259,7 +7336,6 @@ fn emit_code( RegKind::CountSetBits64 => count_set_bits_64, RegKind::CountTrailingZeroBits32 => count_trailing_zero_bits_32, RegKind::CountTrailingZeroBits64 => count_trailing_zero_bits_64, - RegKind::OrCombineByte => or_combine_byte, RegKind::ReverseByte => reverse_byte, RegKind::SignExtend8 => sign_extend_8, RegKind::SignExtend16 => sign_extend_16, diff --git a/crates/polkavm/src/compiler.rs b/crates/polkavm/src/compiler.rs index 806fc000..c7d40f7c 100644 --- a/crates/polkavm/src/compiler.rs +++ b/crates/polkavm/src/compiler.rs @@ -112,7 +112,6 @@ where step_label: Label, trap_label: Label, invalid_jump_label: Label, - or_combine_label: Label, instruction_set: RuntimeInstructionSet, _phantom: PhantomData<(S, B)>, @@ -219,7 +218,6 @@ where let step_label = asm.forward_declare_label(); let jump_table_label = asm.forward_declare_label(); let sbrk_label = asm.forward_declare_label(); - let or_combine_label = asm.forward_declare_label(); polkavm_common::static_assert!(polkavm_common::zygote::VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE < u32::MAX); @@ -246,7 +244,6 @@ where step_label, jump_table_label, sbrk_label, - or_combine_label, gas_metering: config.gas_metering, step_tracing, program_counter_to_machine_code_offset_list, @@ -261,7 +258,6 @@ where ArchVisitor(&mut visitor).emit_trap_trampoline(); ArchVisitor(&mut visitor).emit_ecall_trampoline(); ArchVisitor(&mut visitor).emit_sbrk_trampoline(); - ArchVisitor(&mut visitor).emit_or_combine_trampoline(); if step_tracing { ArchVisitor(&mut visitor).emit_step_trampoline(); @@ -1145,13 +1141,6 @@ where self.after_instruction::(code_offset, args_length); } - fn or_combine_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { - self.before_instruction(code_offset); - self.gas_visitor.or_combine_byte(d, s); - ArchVisitor(self).or_combine_byte(d, s); - self.after_instruction::(code_offset, args_length); - } - fn reverse_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { self.before_instruction(code_offset); self.gas_visitor.reverse_byte(d, s); diff --git a/crates/polkavm/src/compiler/amd64.rs b/crates/polkavm/src/compiler/amd64.rs index 3cba6e46..a731510c 100644 --- a/crates/polkavm/src/compiler/amd64.rs +++ b/crates/polkavm/src/compiler/amd64.rs @@ -701,39 +701,6 @@ where self.push(ret()); } - pub(crate) fn emit_or_combine_trampoline(&mut self) { - log::trace!("Emitting trampoline: or_combine"); - let label = self.or_combine_label; - let (reg_size, range) = if B::BITNESS == Bitness::B32 { - (RegSize::R32, (8..32)) - } else { - (RegSize::R64, (8..64)) - }; - - self.define_label(label); - self.push(push(TMP_REG)); - self.save_registers_to_vmctx(); - - self.push(pop(rdi)); - self.push(mov_imm(rax, imm32(0xff))); - self.push(or((reg_size, rdi, rax))); - self.push(test((reg_size, TMP_REG, rax))); - self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); - - for _ in range.step_by(8) { - self.push(mov(reg_size, rdi, TMP_REG)); - self.push(shl_imm(RegSize::R64, rax, 8)); - self.push(or((reg_size, rdi, rax))); - self.push(test((reg_size, TMP_REG, rax))); - self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); - } - - self.push(push(TMP_REG)); - self.restore_registers_from_vmctx(); - self.push(pop(TMP_REG)); - self.push(ret()); - } - pub(crate) fn trace_execution(&mut self, code_offset: u32) { let step_label = self.step_label; let asm = self.asm.reserve::(); @@ -1773,20 +1740,6 @@ where self.push(movzx_16_to_64(self.reg_size(), conv_reg(d), conv_reg(s))) } - #[inline(always)] - pub fn or_combine_byte(&mut self, d: RawReg, s: RawReg) { - let reg_size = self.reg_size(); - let d = conv_reg(d); - let s = conv_reg(s); - let or_combine_label = self.or_combine_label; - - let asm = self.asm.reserve::(); - let asm = asm.push(mov(reg_size, TMP_REG, s)); - let asm = asm.push(call_label32(or_combine_label)); - let asm = asm.push(mov(reg_size, d, TMP_REG)); - asm.assert_reserved_exactly_as_needed(); - } - #[inline(always)] pub fn reverse_byte(&mut self, d: RawReg, s: RawReg) { let reg_size = self.reg_size(); diff --git a/crates/polkavm/src/gas.rs b/crates/polkavm/src/gas.rs index d9a402fd..7ac6a988 100644 --- a/crates/polkavm/src/gas.rs +++ b/crates/polkavm/src/gas.rs @@ -399,11 +399,6 @@ impl InstructionVisitor for GasVisitor { self.cost += 1; } - #[inline(always)] - fn or_combine_byte(&mut self, _d: RawReg, _s: RawReg) -> Self::ReturnTy { - self.cost += 1; - } - #[inline(always)] fn reverse_byte(&mut self, _d: RawReg, _s: RawReg) -> Self::ReturnTy { self.cost += 1; diff --git a/crates/polkavm/src/interpreter.rs b/crates/polkavm/src/interpreter.rs index 0338fc52..9c8a49cb 100644 --- a/crates/polkavm/src/interpreter.rs +++ b/crates/polkavm/src/interpreter.rs @@ -2641,42 +2641,6 @@ define_interpreter! { visitor.go_to_next_instruction() } - fn or_combine_byte_32(visitor: &mut Visitor, d: Reg, s: Reg) -> Option { - if DEBUG { - log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::or_combine_byte(d, s)); - } - - let word = visitor.get32(s); - - let mut result = 0; - for i in (0..32).step_by(8) { - if (word & (0xffu32 << i)) != 0 { - result |= 0xffu32 << i; - } - } - - visitor.set32::(d, result); - visitor.go_to_next_instruction() - } - - fn or_combine_byte_64(visitor: &mut Visitor, d: Reg, s: Reg) -> Option { - if DEBUG { - log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::or_combine_byte(d, s)); - } - - let word = visitor.get64(s); - - let mut result = 0; - for i in (0..64).step_by(8) { - if (word & (0xffu64 << i)) != 0 { - result |= 0xffu64 << i; - } - } - - visitor.set64::(d, result); - visitor.go_to_next_instruction() - } - fn reverse_byte_32(visitor: &mut Visitor, d: Reg, s: Reg) -> Option { if DEBUG { log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::reverse_byte(d, s)); @@ -3962,14 +3926,6 @@ impl<'a, const DEBUG: bool> InstructionVisitor for Compiler<'a, DEBUG> { } } - fn or_combine_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy { - if self.module.blob().is_64_bit() { - emit!(self, or_combine_byte_64(d, s)); - } else { - emit!(self, or_combine_byte_32(d, s)); - } - } - fn reverse_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy { if self.module.blob().is_64_bit() { emit!(self, reverse_byte_64(d, s));