diff --git a/vm/src/arch/adapters/rv32_alu.rs b/vm/src/arch/adapters/rv32_alu.rs index fe54f43079..be659a8397 100644 --- a/vm/src/arch/adapters/rv32_alu.rs +++ b/vm/src/arch/adapters/rv32_alu.rs @@ -8,24 +8,43 @@ use p3_field::{AbstractField, Field, PrimeField32}; use super::RV32_REGISTER_NUM_LANES; use crate::{ arch::{ - ExecutionBridge, ExecutionState, InstructionOutput, IntegrationInterface, MachineAdapter, - MachineAdapterInterface, Result, + ExecutionBridge, ExecutionBus, ExecutionState, InstructionOutput, IntegrationInterface, + MachineAdapter, MachineAdapterInterface, Result, }, memory::{ offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols}, - MemoryChip, MemoryReadRecord, MemoryWriteRecord, + MemoryChip, MemoryChipRef, MemoryReadRecord, MemoryWriteRecord, }, - program::Instruction, + program::{bridge::ProgramBus, Instruction}, }; /// Reads instructions of the form OP a, b, c, d, e where [a:4]_d = [b:4]_d op [c:4]_e. /// Operand d can only be 1, and e can be either 1 (for register reads) or 0 (when c /// is an immediate). +#[derive(Debug)] pub struct Rv32AluAdapter { _marker: std::marker::PhantomData, pub air: Rv32AluAdapterAir, } +impl Rv32AluAdapter { + pub fn new( + execution_bus: ExecutionBus, + program_bus: ProgramBus, + memory_chip: MemoryChipRef, + ) -> Self { + let memory_bridge = memory_chip.borrow().memory_bridge(); + Self { + _marker: std::marker::PhantomData, + air: Rv32AluAdapterAir { + _execution_bridge: ExecutionBridge::new(execution_bus, program_bus), + _memory_bridge: memory_bridge, + }, + } + } +} + +#[derive(Debug)] pub struct Rv32AluReadRecord { /// Read register value from address space d=1 pub rs1: MemoryReadRecord, @@ -36,6 +55,7 @@ pub struct Rv32AluReadRecord { pub rs2_is_imm: bool, } +#[derive(Debug)] pub struct Rv32AluWriteRecord { pub from_state: ExecutionState, /// Write to destination register diff --git a/vm/src/arch/chips.rs b/vm/src/arch/chips.rs index 0056434ab8..3c9f48e55d 100644 --- a/vm/src/arch/chips.rs +++ b/vm/src/arch/chips.rs @@ -28,6 +28,7 @@ use crate::{ memory::MemoryChipRef, modular_addsub::ModularAddSubChip, modular_multdiv::ModularMultDivChip, + new_alu::Rv32ArithmeticLogicChip, program::{ExecutionError, Instruction, ProgramChip}, shift::ShiftChip, ui::UiChip, @@ -183,6 +184,7 @@ pub enum InstructionExecutorVariant { ModularAddSub(Rc>>), ModularMultDiv(Rc>>), ArithmeticLogicUnit256(Rc>>), + ArithmeticLogicUnitRv32(Rc>>), U256Multiplication(Rc>>), Shift256(Rc>>), Ui(Rc>>), @@ -205,6 +207,7 @@ pub enum MachineChipVariant { Keccak256(Rc>>), ByteXor(Arc>), ArithmeticLogicUnit256(Rc>>), + ArithmeticLogicUnitRv32(Rc>>), U256Multiplication(Rc>>), Shift256(Rc>>), Ui(Rc>>), diff --git a/vm/src/arch/instructions.rs b/vm/src/arch/instructions.rs index 416f4d9128..bdc3aa20b7 100644 --- a/vm/src/arch/instructions.rs +++ b/vm/src/arch/instructions.rs @@ -16,6 +16,10 @@ pub trait UsizeOpcode { } } +pub fn with_default_offset(opcode: Opcode) -> usize { + Opcode::default_offset() + opcode.as_usize() +} + #[derive( Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, UsizeOpcode, )] @@ -178,6 +182,16 @@ pub enum U32Opcode { AUIPC, } -pub fn with_default_offset(opcode: Opcode) -> usize { - Opcode::default_offset() + opcode.as_usize() +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, UsizeOpcode, +)] +#[opcode_offset = 0x300] +#[repr(usize)] +#[allow(non_camel_case_types)] +pub enum AluOpcode { + ADD, + SUB, + XOR, + OR, + AND, } diff --git a/vm/src/arch/integration_api.rs b/vm/src/arch/integration_api.rs index 9115d5b9f7..b2056da0e5 100644 --- a/vm/src/arch/integration_api.rs +++ b/vm/src/arch/integration_api.rs @@ -1,11 +1,13 @@ use std::borrow::Borrow; -use afs_stark_backend::interaction::InteractionBuilder; +use afs_stark_backend::{interaction::InteractionBuilder, rap::AnyRap}; use p3_air::{Air, AirBuilderWithPublicValues, BaseAir, PairBuilder}; +use p3_commit::PolynomialSpace; use p3_field::{AbstractField, PrimeField32}; -use p3_matrix::Matrix; +use p3_matrix::{dense::RowMajorMatrix, Matrix}; +use p3_uni_stark::{Domain, StarkGenericConfig}; -use super::{ExecutionState, InstructionExecutor, Result}; +use super::{ExecutionState, InstructionExecutor, MachineChip, Result}; use crate::{ memory::{MemoryChip, MemoryChipRef}, program::Instruction, @@ -130,6 +132,7 @@ pub struct IntegrationInterface> { pub instruction: I::ProcessedInstruction, } +#[derive(Debug)] pub struct MachineChipWrapper, M: MachineIntegration> { pub adapter: A, pub inner: M, @@ -175,6 +178,22 @@ where } } +impl MachineChipWrapper +where + F: PrimeField32, + A: MachineAdapter, + M: MachineIntegration, +{ + pub fn new(adapter: A, inner: M, memory: MemoryChipRef) -> Self { + Self { + adapter, + inner, + records: vec![], + memory, + } + } +} + impl InstructionExecutor for MachineChipWrapper where F: PrimeField32, @@ -204,14 +223,13 @@ where } } -/*TODO impl MachineChip for MachineChipWrapper where F: PrimeField32, A: MachineAdapter, M: MachineIntegration, - [F]: BorrowMut>, - [F]: BorrowMut>, + // [F]: BorrowMut>, + // [F]: BorrowMut>, { fn generate_trace(self) -> RowMajorMatrix { let height = self.records.len().next_power_of_two(); @@ -221,10 +239,28 @@ where let mut values = vec![F::zero(); height * width]; // This zip only goes through records. The padding rows between records.len()..height // are filled with zeros. - for (row, record) in values.chunks_exact_mut(width).zip(self.records) { + for (_row, _record) in values.chunks_exact_mut(width).zip(self.records) { todo!() } RowMajorMatrix::new(values, width) } + + fn air(&self) -> Box> + where + Domain: PolynomialSpace, + { + todo!() + } + + fn air_name(&self) -> String { + todo!() + } + + fn current_trace_height(&self) -> usize { + todo!() + } + + fn trace_width(&self) -> usize { + todo!() + } } -*/ diff --git a/vm/src/lib.rs b/vm/src/lib.rs index cd230d9ec2..270c37f400 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -9,6 +9,7 @@ pub mod hashes; pub mod memory; pub mod modular_addsub; pub mod modular_multdiv; +pub mod new_alu; pub mod program; /// SDK functions for running and proving programs in the VM. #[cfg(feature = "sdk")] diff --git a/vm/src/new_alu/integration.rs b/vm/src/new_alu/integration.rs new file mode 100644 index 0000000000..34e87764b8 --- /dev/null +++ b/vm/src/new_alu/integration.rs @@ -0,0 +1,212 @@ +use std::{array, mem::size_of, sync::Arc}; + +use afs_derive::AlignedBorrow; +use afs_primitives::xor::{bus::XorBus, lookup::XorLookupChip}; +use afs_stark_backend::interaction::InteractionBuilder; +use p3_air::{Air, AirBuilderWithPublicValues, BaseAir, PairBuilder}; +use p3_field::{Field, PrimeField32}; + +use crate::{ + arch::{ + instructions::{AluOpcode, UsizeOpcode}, + InstructionOutput, IntegrationInterface, MachineAdapter, MachineAdapterInterface, + MachineIntegration, Result, + }, + program::Instruction, +}; + +// TODO: Replace current ALU module upon completion + +#[repr(C)] +#[derive(AlignedBorrow)] +pub struct ArithmeticLogicCols { + pub a: [T; NUM_LIMBS], + pub b: [T; NUM_LIMBS], + pub c: [T; NUM_LIMBS], + + pub opcode_add_flag: T, + pub opcode_sub_flag: T, + pub opcode_xor_flag: T, + pub opcode_and_flag: T, + pub opcode_or_flag: T, +} + +impl + ArithmeticLogicCols +{ + pub fn width() -> usize { + size_of::>() + } +} + +#[derive(Copy, Clone, Debug)] +pub struct ArithmeticLogicAir { + pub bus: XorBus, +} + +impl BaseAir + for ArithmeticLogicAir +{ + fn width(&self) -> usize { + ArithmeticLogicCols::::width() + } +} + +impl Air + for ArithmeticLogicAir +{ + fn eval(&self, _builder: &mut AB) { + todo!(); + } +} + +#[derive(Debug)] +pub struct ArithmeticLogicIntegration { + pub air: ArithmeticLogicAir, + pub xor_lookup_chip: Arc>, + offset: usize, +} + +impl + ArithmeticLogicIntegration +{ + pub fn new(xor_lookup_chip: Arc>, offset: usize) -> Self { + Self { + air: ArithmeticLogicAir { + bus: xor_lookup_chip.bus(), + }, + xor_lookup_chip, + offset, + } + } +} + +impl, const NUM_LIMBS: usize, const LIMB_BITS: usize> + MachineIntegration for ArithmeticLogicIntegration +where + A::Interface: MachineAdapterInterface, + as MachineAdapterInterface>::Reads: Into<[[F; NUM_LIMBS]; 2]>, + as MachineAdapterInterface>::Writes: From<[F; NUM_LIMBS]>, +{ + // TODO: update for trace generation + type Record = u32; + type Cols = ArithmeticLogicCols; + type Air = ArithmeticLogicAir; + + #[allow(clippy::type_complexity)] + fn execute_instruction( + &self, + instruction: &Instruction, + from_pc: F, + reads: as MachineAdapterInterface>::Reads, + ) -> Result<(InstructionOutput>, Self::Record)> { + let Instruction { opcode, .. } = instruction; + let opcode = AluOpcode::from_usize(opcode - self.offset); + + let data: [[F; NUM_LIMBS]; 2] = reads.into(); + let x = data[0].map(|x| x.as_canonical_u32()); + let y = data[1].map(|y| y.as_canonical_u32()); + let z = solve_alu::(opcode, &x, &y); + + // Integration doesn't modify PC directly, so we let Adapter handle the increment + let output: InstructionOutput> = InstructionOutput { + to_pc: from_pc, + writes: z.map(F::from_canonical_u32).into(), + }; + + // TODO: send XorLookupChip requests + // TODO: create Record and return + + Ok((output, 0)) + } + + fn get_opcode_name(&self, _opcode: usize) -> String { + todo!() + } + + fn generate_trace_row(&self, _row_slice: &mut Self::Cols, _record: Self::Record) { + todo!() + } + + /// Returns `(to_pc, interface)`. + fn eval_primitive + PairBuilder + AirBuilderWithPublicValues>( + _air: &Self::Air, + _builder: &mut AB, + _local: &Self::Cols, + _local_adapter: &A::Cols, + ) -> IntegrationInterface> { + todo!() + } + + fn air(&self) -> Self::Air { + self.air + } +} + +pub(super) fn solve_alu( + opcode: AluOpcode, + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + match opcode { + AluOpcode::ADD => solve_add::(x, y), + AluOpcode::SUB => solve_subtract::(x, y), + AluOpcode::XOR => solve_xor::(x, y), + AluOpcode::OR => solve_or::(x, y), + AluOpcode::AND => solve_and::(x, y), + } +} + +fn solve_add( + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + let mut z = [0u32; NUM_LIMBS]; + let mut carry = [0u32; NUM_LIMBS]; + for i in 0..NUM_LIMBS { + z[i] = x[i] + y[i] + if i > 0 { carry[i - 1] } else { 0 }; + carry[i] = z[i] >> LIMB_BITS; + z[i] &= (1 << LIMB_BITS) - 1; + } + z +} + +fn solve_subtract( + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + let mut z = [0u32; NUM_LIMBS]; + let mut carry = [0u32; NUM_LIMBS]; + for i in 0..NUM_LIMBS { + let rhs = y[i] + if i > 0 { carry[i - 1] } else { 0 }; + if x[i] >= rhs { + z[i] = x[i] - rhs; + carry[i] = 0; + } else { + z[i] = x[i] + (1 << LIMB_BITS) - rhs; + carry[i] = 1; + } + } + z +} + +fn solve_xor( + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + array::from_fn(|i| x[i] ^ y[i]) +} + +fn solve_or( + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + array::from_fn(|i| x[i] | y[i]) +} + +fn solve_and( + x: &[u32; NUM_LIMBS], + y: &[u32; NUM_LIMBS], +) -> [u32; NUM_LIMBS] { + array::from_fn(|i| x[i] & y[i]) +} diff --git a/vm/src/new_alu/mod.rs b/vm/src/new_alu/mod.rs new file mode 100644 index 0000000000..5b55d60fbd --- /dev/null +++ b/vm/src/new_alu/mod.rs @@ -0,0 +1,11 @@ +use crate::arch::{MachineChipWrapper, Rv32AluAdapter}; + +mod integration; +pub use integration::*; + +#[cfg(test)] +mod tests; + +// TODO: Replace current ALU256 module upon completion +pub type Rv32ArithmeticLogicChip = + MachineChipWrapper, ArithmeticLogicIntegration<4, 8>>; diff --git a/vm/src/new_alu/tests.rs b/vm/src/new_alu/tests.rs new file mode 100644 index 0000000000..93658bfd58 --- /dev/null +++ b/vm/src/new_alu/tests.rs @@ -0,0 +1,60 @@ +use super::integration::solve_alu; +use crate::arch::instructions::AluOpcode; + +const RV32_NUM_LIMBS: usize = 4; +const RV32_LIMB_BITS: usize = 8; + +#[test] +fn solve_add_sanity_test() { + let x: [u32; RV32_NUM_LIMBS] = [229, 33, 29, 111]; + let y: [u32; RV32_NUM_LIMBS] = [50, 171, 44, 194]; + let z: [u32; RV32_NUM_LIMBS] = [23, 205, 73, 49]; + let result = solve_alu::(AluOpcode::ADD, &x, &y); + for i in 0..RV32_NUM_LIMBS { + assert_eq!(z[i], result[i]) + } +} + +#[test] +fn solve_sub_sanity_test() { + let x: [u32; RV32_NUM_LIMBS] = [229, 33, 29, 111]; + let y: [u32; RV32_NUM_LIMBS] = [50, 171, 44, 194]; + let z: [u32; RV32_NUM_LIMBS] = [179, 118, 240, 172]; + let result = solve_alu::(AluOpcode::SUB, &x, &y); + for i in 0..RV32_NUM_LIMBS { + assert_eq!(z[i], result[i]) + } +} + +#[test] +fn solve_xor_sanity_test() { + let x: [u32; RV32_NUM_LIMBS] = [229, 33, 29, 111]; + let y: [u32; RV32_NUM_LIMBS] = [50, 171, 44, 194]; + let z: [u32; RV32_NUM_LIMBS] = [215, 138, 49, 173]; + let result = solve_alu::(AluOpcode::XOR, &x, &y); + for i in 0..RV32_NUM_LIMBS { + assert_eq!(z[i], result[i]) + } +} + +#[test] +fn solve_or_sanity_test() { + let x: [u32; RV32_NUM_LIMBS] = [229, 33, 29, 111]; + let y: [u32; RV32_NUM_LIMBS] = [50, 171, 44, 194]; + let z: [u32; RV32_NUM_LIMBS] = [247, 171, 61, 239]; + let result = solve_alu::(AluOpcode::OR, &x, &y); + for i in 0..RV32_NUM_LIMBS { + assert_eq!(z[i], result[i]) + } +} + +#[test] +fn solve_and_sanity_test() { + let x: [u32; RV32_NUM_LIMBS] = [229, 33, 29, 111]; + let y: [u32; RV32_NUM_LIMBS] = [50, 171, 44, 194]; + let z: [u32; RV32_NUM_LIMBS] = [32, 33, 12, 66]; + let result = solve_alu::(AluOpcode::AND, &x, &y); + for i in 0..RV32_NUM_LIMBS { + assert_eq!(z[i], result[i]) + } +} diff --git a/vm/src/vm/config.rs b/vm/src/vm/config.rs index f24593242e..d5dabd2471 100644 --- a/vm/src/vm/config.rs +++ b/vm/src/vm/config.rs @@ -68,6 +68,11 @@ fn default_executor_range(executor: ExecutorName) -> (Range, usize) { 8, U256Opcode::default_offset(), ), + ExecutorName::ArithmeticLogicUnitRv32 => ( + AluOpcode::default_offset(), + AluOpcode::COUNT, + AluOpcode::default_offset(), + ), ExecutorName::U256Multiplication => ( U256Opcode::default_offset() + 11, 1, diff --git a/vm/src/vm/segment.rs b/vm/src/vm/segment.rs index 1eaff5548a..7944be8953 100644 --- a/vm/src/vm/segment.rs +++ b/vm/src/vm/segment.rs @@ -30,7 +30,7 @@ use crate::{ alu::ArithmeticLogicChip, arch::{ instructions::*, ExecutionBus, ExecutionState, ExecutorName, InstructionExecutor, - InstructionExecutorVariant, MachineChip, MachineChipVariant, + InstructionExecutorVariant, MachineChip, MachineChipVariant, Rv32AluAdapter, }, castf::CastFChip, core::{ @@ -44,6 +44,7 @@ use crate::{ memory::{offline_checker::MemoryBus, MemoryChip, MemoryChipRef}, modular_addsub::{ModularAddSubChip, SECP256K1_COORD_PRIME, SECP256K1_SCALAR_PRIME}, modular_multdiv::ModularMultDivChip, + new_alu::{ArithmeticLogicIntegration, Rv32ArithmeticLogicChip}, program::{bridge::ProgramBus, DebugInfo, ExecutionError, Program, ProgramChip}, shift::ShiftChip, ui::UiChip, @@ -264,6 +265,17 @@ impl ExecutionSegment { executors.insert(opcode, new_chip.clone().into()); } } + ExecutorName::ArithmeticLogicUnitRv32 => { + let chip = Rc::new(RefCell::new(Rv32ArithmeticLogicChip::new( + Rv32AluAdapter::new(execution_bus, program_bus, memory_chip.clone()), + ArithmeticLogicIntegration::new(byte_xor_chip.clone(), offset), + memory_chip.clone(), + ))); + for opcode in range { + executors.insert(opcode, chip.clone().into()); + } + chips.push(MachineChipVariant::ArithmeticLogicUnitRv32(chip)); + } ExecutorName::ArithmeticLogicUnit256 => { // We probably must include this chip if we include any modular arithmetic, // not sure if we need to enforce this here.