Skip to content

Commit

Permalink
polkavm: Replace orc.b with an instruction sequence in the linker.
Browse files Browse the repository at this point in the history
And get rid orc.b emulation code from the interpreter and the recompiler.

We are making this change because orc.b is rarely used and a very
complicated instruction in itself, we don't need to support or upstream
such a complicated instruction to Polkavm. Therefore let's emulate the
instruction within the linker.

In addition, we are adding a warn print for the linker user to make
them aware that orc.b is being replaced with an instruction sequence,
and any perf benefit that the user is expecting would not be visible.

Signed-off-by: Aman <[email protected]>
  • Loading branch information
aman4150 committed Jan 3, 2025
1 parent 965ff40 commit 2b1fb65
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 117 deletions.
7 changes: 0 additions & 7 deletions crates/polkavm-common/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1602,7 +1602,6 @@ define_opcodes! {
[I_64, I_32] sign_extend_8 = 45,
[I_64, I_32] sign_extend_16 = 46,
[I_64, I_32] zero_extend_16 = 47,
[I_64, I_32] or_combine_byte = 48,
[I_64, I_32] reverse_byte = 49,
]

Expand Down Expand Up @@ -2415,12 +2414,6 @@ impl<'a, 'b, 'c> InstructionVisitor for InstructionFormatter<'a, 'b, 'c> {
write!(self, "{d} = zext.h {s}")
}

fn or_combine_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy {
let d = self.format_reg(d);
let s = self.format_reg(s);
write!(self, "{d} = orc.b {s}")
}

fn reverse_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy {
let d = self.format_reg(d);
let s = self.format_reg(s);
Expand Down
82 changes: 79 additions & 3 deletions crates/polkavm-linker/src/program_from_elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,73 @@ fn resolve_simple_zero_register_usage(
false
}

fn emit_or_combine_byte(
location: SectionTarget,
dst: Reg,
src: Reg,
rv64: bool,
mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
) {
let op_reg = dst;
let cmp_reg = Reg::E1;
let tmp_reg = Reg::E2;
let mask_reg = if dst != src { src } else { Reg::E3 };
let range = if rv64 { 0..64 } else { 0..32 };

log::warn!("Emulating orc.b at {:?} with an instruction sequence", location);

if dst != src {
emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
}

// Loop:
// mov tmp, op
// shl mask, 8
// or tmp, mask
// test op, mask
// cmov.neq op, tmp

for iter in range.step_by(8) {
emit(InstExt::Basic(BasicInst::MoveReg { dst: tmp_reg, src: op_reg }));

if iter == 0 {
emit(InstExt::Basic(BasicInst::LoadImmediate { dst: mask_reg, imm: 0xff }));
} else {
emit(InstExt::Basic(BasicInst::AnyAny {
kind: if rv64 {
AnyAnyKind::ShiftLogicalLeft64
} else {
AnyAnyKind::ShiftLogicalLeft32
},
dst: mask_reg,
src1: RegImm::Reg(mask_reg),
src2: RegImm::Imm(8),
}));
}

emit(InstExt::Basic(BasicInst::AnyAny {
kind: if rv64 { AnyAnyKind::Or64 } else { AnyAnyKind::Or32 },
dst: tmp_reg,
src1: RegImm::Reg(tmp_reg),
src2: RegImm::Reg(mask_reg),
}));

emit(InstExt::Basic(BasicInst::AnyAny {
kind: if rv64 { AnyAnyKind::And64 } else { AnyAnyKind::And32 },
dst: cmp_reg,
src1: RegImm::Reg(op_reg),
src2: RegImm::Reg(mask_reg),
}));

emit(InstExt::Basic(BasicInst::Cmov {
kind: CmovKind::NotEqZero,
dst: op_reg,
src: RegImm::Reg(tmp_reg),
cond: cmp_reg,
}));
}
}

fn convert_instruction<H>(
elf: &Elf<H>,
section: &Section,
Expand Down Expand Up @@ -2005,18 +2072,29 @@ where
};

use crate::riscv::RegKind as K;

//
// if OrCombineByte then emit function call
//

if kind == K::OrCombineByte {
emit_or_combine_byte(current_location, dst, src, rv64, &mut emit);
emit(InstExt::nop());
return Ok(());
}

let kind = match kind {
K::CountLeadingZeroBits32 => RegKind::CountLeadingZeroBits32,
K::CountLeadingZeroBits64 => RegKind::CountLeadingZeroBits64,
K::CountSetBits32 => RegKind::CountSetBits32,
K::CountSetBits64 => RegKind::CountSetBits64,
K::CountTrailingZeroBits32 => RegKind::CountTrailingZeroBits32,
K::CountTrailingZeroBits64 => RegKind::CountTrailingZeroBits64,
K::OrCombineByte => RegKind::OrCombineByte,
K::ReverseByte => RegKind::ReverseByte,
K::SignExtend8 => RegKind::SignExtend8,
K::SignExtend16 => RegKind::SignExtend16,
K::ZeroExtend16 => RegKind::ZeroExtend16,
K::OrCombineByte => unreachable!(),
};

emit(InstExt::Basic(BasicInst::Reg { kind, dst, src }));
Expand Down Expand Up @@ -3702,7 +3780,6 @@ pub enum RegKind {
CountSetBits64,
CountTrailingZeroBits32,
CountTrailingZeroBits64,
OrCombineByte,
ReverseByte,
SignExtend8,
SignExtend16,
Expand Down Expand Up @@ -7259,7 +7336,6 @@ fn emit_code(
RegKind::CountSetBits64 => count_set_bits_64,
RegKind::CountTrailingZeroBits32 => count_trailing_zero_bits_32,
RegKind::CountTrailingZeroBits64 => count_trailing_zero_bits_64,
RegKind::OrCombineByte => or_combine_byte,
RegKind::ReverseByte => reverse_byte,
RegKind::SignExtend8 => sign_extend_8,
RegKind::SignExtend16 => sign_extend_16,
Expand Down
11 changes: 0 additions & 11 deletions crates/polkavm/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ where
step_label: Label,
trap_label: Label,
invalid_jump_label: Label,
or_combine_label: Label,
instruction_set: RuntimeInstructionSet,

_phantom: PhantomData<(S, B)>,
Expand Down Expand Up @@ -219,7 +218,6 @@ where
let step_label = asm.forward_declare_label();
let jump_table_label = asm.forward_declare_label();
let sbrk_label = asm.forward_declare_label();
let or_combine_label = asm.forward_declare_label();

polkavm_common::static_assert!(polkavm_common::zygote::VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE < u32::MAX);

Expand All @@ -246,7 +244,6 @@ where
step_label,
jump_table_label,
sbrk_label,
or_combine_label,
gas_metering: config.gas_metering,
step_tracing,
program_counter_to_machine_code_offset_list,
Expand All @@ -261,7 +258,6 @@ where
ArchVisitor(&mut visitor).emit_trap_trampoline();
ArchVisitor(&mut visitor).emit_ecall_trampoline();
ArchVisitor(&mut visitor).emit_sbrk_trampoline();
ArchVisitor(&mut visitor).emit_or_combine_trampoline();

if step_tracing {
ArchVisitor(&mut visitor).emit_step_trampoline();
Expand Down Expand Up @@ -1145,13 +1141,6 @@ where
self.after_instruction::<CONTINUE_BASIC_BLOCK>(code_offset, args_length);
}

fn or_combine_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
self.before_instruction(code_offset);
self.gas_visitor.or_combine_byte(d, s);
ArchVisitor(self).or_combine_byte(d, s);
self.after_instruction::<CONTINUE_BASIC_BLOCK>(code_offset, args_length);
}

fn reverse_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
self.before_instruction(code_offset);
self.gas_visitor.reverse_byte(d, s);
Expand Down
47 changes: 0 additions & 47 deletions crates/polkavm/src/compiler/amd64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,39 +701,6 @@ where
self.push(ret());
}

pub(crate) fn emit_or_combine_trampoline(&mut self) {
log::trace!("Emitting trampoline: or_combine");
let label = self.or_combine_label;
let (reg_size, range) = if B::BITNESS == Bitness::B32 {
(RegSize::R32, (8..32))
} else {
(RegSize::R64, (8..64))
};

self.define_label(label);
self.push(push(TMP_REG));
self.save_registers_to_vmctx();

self.push(pop(rdi));
self.push(mov_imm(rax, imm32(0xff)));
self.push(or((reg_size, rdi, rax)));
self.push(test((reg_size, TMP_REG, rax)));
self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi));

for _ in range.step_by(8) {
self.push(mov(reg_size, rdi, TMP_REG));
self.push(shl_imm(RegSize::R64, rax, 8));
self.push(or((reg_size, rdi, rax)));
self.push(test((reg_size, TMP_REG, rax)));
self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi));
}

self.push(push(TMP_REG));
self.restore_registers_from_vmctx();
self.push(pop(TMP_REG));
self.push(ret());
}

pub(crate) fn trace_execution(&mut self, code_offset: u32) {
let step_label = self.step_label;
let asm = self.asm.reserve::<U3>();
Expand Down Expand Up @@ -1773,20 +1740,6 @@ where
self.push(movzx_16_to_64(self.reg_size(), conv_reg(d), conv_reg(s)))
}

#[inline(always)]
pub fn or_combine_byte(&mut self, d: RawReg, s: RawReg) {
let reg_size = self.reg_size();
let d = conv_reg(d);
let s = conv_reg(s);
let or_combine_label = self.or_combine_label;

let asm = self.asm.reserve::<U3>();
let asm = asm.push(mov(reg_size, TMP_REG, s));
let asm = asm.push(call_label32(or_combine_label));
let asm = asm.push(mov(reg_size, d, TMP_REG));
asm.assert_reserved_exactly_as_needed();
}

#[inline(always)]
pub fn reverse_byte(&mut self, d: RawReg, s: RawReg) {
let reg_size = self.reg_size();
Expand Down
5 changes: 0 additions & 5 deletions crates/polkavm/src/gas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,11 +399,6 @@ impl InstructionVisitor for GasVisitor {
self.cost += 1;
}

#[inline(always)]
fn or_combine_byte(&mut self, _d: RawReg, _s: RawReg) -> Self::ReturnTy {
self.cost += 1;
}

#[inline(always)]
fn reverse_byte(&mut self, _d: RawReg, _s: RawReg) -> Self::ReturnTy {
self.cost += 1;
Expand Down
44 changes: 0 additions & 44 deletions crates/polkavm/src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2641,42 +2641,6 @@ define_interpreter! {
visitor.go_to_next_instruction()
}

fn or_combine_byte_32<const DEBUG: bool>(visitor: &mut Visitor, d: Reg, s: Reg) -> Option<Target> {
if DEBUG {
log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::or_combine_byte(d, s));
}

let word = visitor.get32(s);

let mut result = 0;
for i in (0..32).step_by(8) {
if (word & (0xffu32 << i)) != 0 {
result |= 0xffu32 << i;
}
}

visitor.set32::<DEBUG>(d, result);
visitor.go_to_next_instruction()
}

fn or_combine_byte_64<const DEBUG: bool>(visitor: &mut Visitor, d: Reg, s: Reg) -> Option<Target> {
if DEBUG {
log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::or_combine_byte(d, s));
}

let word = visitor.get64(s);

let mut result = 0;
for i in (0..64).step_by(8) {
if (word & (0xffu64 << i)) != 0 {
result |= 0xffu64 << i;
}
}

visitor.set64::<DEBUG>(d, result);
visitor.go_to_next_instruction()
}

fn reverse_byte_32<const DEBUG: bool>(visitor: &mut Visitor, d: Reg, s: Reg) -> Option<Target> {
if DEBUG {
log::trace!("[{}]: {}", visitor.inner.compiled_offset, asm::reverse_byte(d, s));
Expand Down Expand Up @@ -3962,14 +3926,6 @@ impl<'a, const DEBUG: bool> InstructionVisitor for Compiler<'a, DEBUG> {
}
}

fn or_combine_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy {
if self.module.blob().is_64_bit() {
emit!(self, or_combine_byte_64(d, s));
} else {
emit!(self, or_combine_byte_32(d, s));
}
}

fn reverse_byte(&mut self, d: RawReg, s: RawReg) -> Self::ReturnTy {
if self.module.blob().is_64_bit() {
emit!(self, reverse_byte_64(d, s));
Expand Down

0 comments on commit 2b1fb65

Please sign in to comment.