From 648d1ed7e689062a83654ab6a0e22cf0ff58ff17 Mon Sep 17 00:00:00 2001 From: Alan Szepieniec Date: Wed, 12 Jul 2023 10:45:06 +0200 Subject: [PATCH] add macro triton_instr for compiling a single instruction Co-authored-by: Ferdinand Sauer --- triton-vm/src/lib.rs | 24 ++++++++++++---- triton-vm/src/parser.rs | 62 +++++++++++++++++++++++----------------- triton-vm/src/program.rs | 4 +-- 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/triton-vm/src/lib.rs b/triton-vm/src/lib.rs index 9a15c760..f236969b 100644 --- a/triton-vm/src/lib.rs +++ b/triton-vm/src/lib.rs @@ -34,7 +34,7 @@ pub mod stark; pub mod table; pub mod vm; -/// Parse an entire program written in [Triton assembly][tasm]. +/// Compile an entire program written in [Triton assembly][tasm]. /// The resulting [`Program`](crate::program::Program) can be /// [run](crate::program::Program::run). /// @@ -56,7 +56,7 @@ pub mod vm; /// return /// ); /// let output = program.run(vec![3_u64.into()], vec![]).unwrap(); -/// assert_eq!(vec![1_u64.into()], output); +/// assert_eq!(1, output[0].value()); /// ``` /// /// Any type with an appropriate [`Display`](std::fmt::Display) implementation can be @@ -104,7 +104,7 @@ macro_rules! triton_program { }}; } -/// Parse [Triton assembly][tasm] into a list of labelled +/// Compile [Triton assembly][tasm] into a list of labelled /// [`Instruction`](crate::instruction::LabelledInstruction)s. /// Similar to [`triton_program!`](crate::triton_program), it is possible to use string-like /// interpolation to insert instructions, arguments, labels, or other expressions. @@ -123,7 +123,7 @@ macro_rules! triton_program { /// push {push_argument} /// some_other_label: skiz halt return /// ); -/// assert_eq!(6, instructions.len()); +/// assert_eq!(7, instructions.len()); /// ``` /// /// # Panics @@ -161,8 +161,20 @@ macro_rules! triton_asm { }; ($($source_code:tt)*) => {{ let source_code = $crate::triton_asm!(@fmt "",; $($source_code)*); - let (_, instructions) = $crate::parser::program(&source_code).unwrap(); - $crate::parser::to_labelled(&instructions) + let (_, instructions) = $crate::parser::tokenize(&source_code).unwrap(); + $crate::parser::to_labelled_instructions(&instructions) + }}; +} + +/// Compile a single [Triton assembly][tasm] instruction. Output a +/// [`LabelledInstruction`]. +/// +/// [tasm]: https://triton-vm.org/spec/instructions.html +#[macro_export] +macro_rules! triton_instr { + ($instr:ident) => {{ + let (_, instructions) = $crate::parser::tokenize(stringify!($instr)).unwrap(); + instructions[0].to_labelled_instruction() }}; } diff --git a/triton-vm/src/parser.rs b/triton-vm/src/parser.rs index 73bb7473..817eeb86 100644 --- a/triton-vm/src/parser.rs +++ b/triton-vm/src/parser.rs @@ -35,18 +35,21 @@ pub struct ParseError<'a> { pub errors: VerboseError<&'a str>, } -/// A `ParsedInstruction` has `call` addresses encoded as label names. +/// `InstructionToken` is either an instruction with a label, or a +/// label itself. It is intermediate object used in some middle +/// point of the compilation pipeline. You probably want +/// [`LabelledInstruction`]. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum ParsedInstruction<'a> { +pub enum InstructionToken<'a> { Instruction(AnInstruction, &'a str), Label(String, &'a str), } -impl<'a> std::fmt::Display for ParsedInstruction<'a> { +impl<'a> std::fmt::Display for InstructionToken<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ParsedInstruction::Instruction(instr, _) => write!(f, "{instr}"), - ParsedInstruction::Label(label_name, _) => write!(f, "{label_name}:"), + InstructionToken::Instruction(instr, _) => write!(f, "{instr}"), + InstructionToken::Label(label_name, _) => write!(f, "{label_name}:"), } } } @@ -59,16 +62,16 @@ impl<'a> std::fmt::Display for ParseError<'a> { impl<'a> Error for ParseError<'a> {} -impl<'a> ParsedInstruction<'a> { +impl<'a> InstructionToken<'a> { pub fn token_str(&self) -> &'a str { match self { - ParsedInstruction::Instruction(_, token_str) => token_str, - ParsedInstruction::Label(_, token_str) => token_str, + InstructionToken::Instruction(_, token_str) => token_str, + InstructionToken::Label(_, token_str) => token_str, } } - pub fn to_labelled(&self) -> LabelledInstruction { - use ParsedInstruction::*; + pub fn to_labelled_instruction(&self) -> LabelledInstruction { + use InstructionToken::*; match self { Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()), Label(label, _) => LabelledInstruction::Label(label.to_owned()), @@ -76,10 +79,10 @@ impl<'a> ParsedInstruction<'a> { } } -pub fn to_labelled(instructions: &[ParsedInstruction]) -> Vec { +pub fn to_labelled_instructions(instructions: &[InstructionToken]) -> Vec { instructions .iter() - .map(|instruction| instruction.to_labelled()) + .map(|instruction| instruction.to_labelled_instruction()) .collect() } @@ -104,8 +107,8 @@ pub fn pretty_print_error(s: &str, mut e: VerboseError<&str>) -> String { } /// Parse a program -pub fn parse(input: &str) -> Result, ParseError> { - let instructions = match program(input).finish() { +pub fn parse(input: &str) -> Result, ParseError> { + let instructions = match tokenize(input).finish() { Ok((_s, instructions)) => Ok(instructions), Err(errors) => Err(ParseError { input, errors }), }?; @@ -117,15 +120,15 @@ pub fn parse(input: &str) -> Result, ParseError> { fn scan_missing_duplicate_labels<'a>( input: &'a str, - instructions: &[ParsedInstruction<'a>], + instructions: &[InstructionToken<'a>], ) -> Result<(), ParseError<'a>> { - let mut seen: HashMap<&str, ParsedInstruction> = HashMap::default(); - let mut duplicates: HashSet = HashSet::default(); - let mut missings: HashSet = HashSet::default(); + let mut seen: HashMap<&str, InstructionToken> = HashMap::default(); + let mut duplicates: HashSet = HashSet::default(); + let mut missings: HashSet = HashSet::default(); // Find duplicate labels, including the first occurrence of each duplicate. for instruction in instructions.iter() { - if let ParsedInstruction::Label(label, _token_s) = instruction { + if let InstructionToken::Label(label, _token_s) = instruction { if let Some(first_label) = seen.get(label.as_str()) { duplicates.insert(first_label.to_owned()); duplicates.insert(instruction.to_owned()); @@ -137,7 +140,7 @@ fn scan_missing_duplicate_labels<'a>( // Find missing labels for instruction in instructions.iter() { - if let ParsedInstruction::Instruction(Call(addr), _token_s) = instruction { + if let InstructionToken::Instruction(Call(addr), _token_s) = instruction { if !seen.contains_key(addr.as_str()) { missings.insert(instruction.to_owned()); } @@ -176,7 +179,8 @@ fn scan_missing_duplicate_labels<'a>( /// error type, but we want `nom::error::VerboseError` as it allows `context()`. type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>; -pub fn program(s: &str) -> ParseResult> { +/// +pub fn tokenize(s: &str) -> ParseResult> { let (s, _) = comment_or_whitespace0(s)?; let (s, instructions) = many0(alt((label, labelled_instruction)))(s)?; let (s, _) = context("expecting label, instruction or eof", eof)(s)?; @@ -184,12 +188,12 @@ pub fn program(s: &str) -> ParseResult> { Ok((s, instructions)) } -fn labelled_instruction(s_instr: &str) -> ParseResult { +fn labelled_instruction(s_instr: &str) -> ParseResult { let (s, instr) = an_instruction(s_instr)?; - Ok((s, ParsedInstruction::Instruction(instr, s_instr))) + Ok((s, InstructionToken::Instruction(instr, s_instr))) } -fn label(label_s: &str) -> ParseResult { +fn label(label_s: &str) -> ParseResult { let (s, addr) = label_addr(label_s)?; let (s, _) = token0("")(s)?; // whitespace between label and ':' is allowed let (s, _) = token0(":")(s)?; // don't require space after ':' @@ -201,7 +205,7 @@ fn label(label_s: &str) -> ParseResult { return cut(context("label cannot be named after instruction", fail))(label_s); } - Ok((s, ParsedInstruction::Label(addr, label_s))) + Ok((s, InstructionToken::Label(addr, label_s))) } fn an_instruction(s: &str) -> ParseResult> { @@ -509,6 +513,7 @@ pub mod parser_tests { use crate::program::Program; use crate::triton_asm; + use crate::triton_instr; use crate::triton_program; use super::*; @@ -530,7 +535,7 @@ pub mod parser_tests { match parse(test_case.input) { Ok(actual) => assert_eq!( test_case.expected, - Program::new(&to_labelled(&actual)), + Program::new(&to_labelled_instructions(&actual)), "{}", test_case.message ), @@ -1010,4 +1015,9 @@ pub mod parser_tests { .run(vec![], vec![]) .unwrap(); } + + #[test] + fn triton_instruction_macro() { + assert_eq!(LabelledInstruction::Instruction(Halt), triton_instr!(halt)); + } } diff --git a/triton-vm/src/program.rs b/triton-vm/src/program.rs index 5618be96..e99309e4 100644 --- a/triton-vm/src/program.rs +++ b/triton-vm/src/program.rs @@ -19,7 +19,7 @@ use crate::instruction::convert_all_labels_to_addresses; use crate::instruction::Instruction; use crate::instruction::LabelledInstruction; use crate::parser::parse; -use crate::parser::to_labelled; +use crate::parser::to_labelled_instructions; use crate::vm::VMState; /// A `Program` is a `Vec` that contains duplicate elements for instructions with a @@ -139,7 +139,7 @@ impl Program { /// Create a `Program` by parsing source code. pub fn from_code(code: &str) -> Result { parse(code) - .map(|program| Program::new(&to_labelled(&program))) + .map(|program| Program::new(&to_labelled_instructions(&program))) .map_err(|err| anyhow::anyhow!("{}", err)) }