Skip to content

Commit

Permalink
add macro triton_instr for compiling a single instruction
Browse files Browse the repository at this point in the history
Co-authored-by: Ferdinand Sauer <[email protected]>
  • Loading branch information
aszepieniec and jan-ferdinand committed Jul 12, 2023
1 parent 66dffe7 commit 648d1ed
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 34 deletions.
24 changes: 18 additions & 6 deletions triton-vm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub mod stark;
pub mod table;
pub mod vm;

/// Parse an entire program written in [Triton assembly][tasm].
/// Compile an entire program written in [Triton assembly][tasm].
/// The resulting [`Program`](crate::program::Program) can be
/// [run](crate::program::Program::run).
///
Expand All @@ -56,7 +56,7 @@ pub mod vm;
/// return
/// );
/// let output = program.run(vec![3_u64.into()], vec![]).unwrap();
/// assert_eq!(vec![1_u64.into()], output);
/// assert_eq!(1, output[0].value());
/// ```
///
/// Any type with an appropriate [`Display`](std::fmt::Display) implementation can be
Expand Down Expand Up @@ -104,7 +104,7 @@ macro_rules! triton_program {
}};
}

/// Parse [Triton assembly][tasm] into a list of labelled
/// Compile [Triton assembly][tasm] into a list of labelled
/// [`Instruction`](crate::instruction::LabelledInstruction)s.
/// Similar to [`triton_program!`](crate::triton_program), it is possible to use string-like
/// interpolation to insert instructions, arguments, labels, or other expressions.
Expand All @@ -123,7 +123,7 @@ macro_rules! triton_program {
/// push {push_argument}
/// some_other_label: skiz halt return
/// );
/// assert_eq!(6, instructions.len());
/// assert_eq!(7, instructions.len());
/// ```
///
/// # Panics
Expand Down Expand Up @@ -161,8 +161,20 @@ macro_rules! triton_asm {
};
($($source_code:tt)*) => {{
let source_code = $crate::triton_asm!(@fmt "",; $($source_code)*);
let (_, instructions) = $crate::parser::program(&source_code).unwrap();
$crate::parser::to_labelled(&instructions)
let (_, instructions) = $crate::parser::tokenize(&source_code).unwrap();
$crate::parser::to_labelled_instructions(&instructions)
}};
}

/// Compile a single [Triton assembly][tasm] instruction. Output a
/// [`LabelledInstruction`].
///
/// [tasm]: https://triton-vm.org/spec/instructions.html
#[macro_export]
macro_rules! triton_instr {
($instr:ident) => {{
let (_, instructions) = $crate::parser::tokenize(stringify!($instr)).unwrap();
instructions[0].to_labelled_instruction()
}};
}

Expand Down
62 changes: 36 additions & 26 deletions triton-vm/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,21 @@ pub struct ParseError<'a> {
pub errors: VerboseError<&'a str>,
}

/// A `ParsedInstruction` has `call` addresses encoded as label names.
/// `InstructionToken` is either an instruction with a label, or a
/// label itself. It is intermediate object used in some middle
/// point of the compilation pipeline. You probably want
/// [`LabelledInstruction`].
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ParsedInstruction<'a> {
pub enum InstructionToken<'a> {
Instruction(AnInstruction<String>, &'a str),
Label(String, &'a str),
}

impl<'a> std::fmt::Display for ParsedInstruction<'a> {
impl<'a> std::fmt::Display for InstructionToken<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParsedInstruction::Instruction(instr, _) => write!(f, "{instr}"),
ParsedInstruction::Label(label_name, _) => write!(f, "{label_name}:"),
InstructionToken::Instruction(instr, _) => write!(f, "{instr}"),
InstructionToken::Label(label_name, _) => write!(f, "{label_name}:"),
}
}
}
Expand All @@ -59,27 +62,27 @@ impl<'a> std::fmt::Display for ParseError<'a> {

impl<'a> Error for ParseError<'a> {}

impl<'a> ParsedInstruction<'a> {
impl<'a> InstructionToken<'a> {
pub fn token_str(&self) -> &'a str {
match self {
ParsedInstruction::Instruction(_, token_str) => token_str,
ParsedInstruction::Label(_, token_str) => token_str,
InstructionToken::Instruction(_, token_str) => token_str,
InstructionToken::Label(_, token_str) => token_str,
}
}

pub fn to_labelled(&self) -> LabelledInstruction {
use ParsedInstruction::*;
pub fn to_labelled_instruction(&self) -> LabelledInstruction {
use InstructionToken::*;
match self {
Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()),
Label(label, _) => LabelledInstruction::Label(label.to_owned()),
}
}
}

pub fn to_labelled(instructions: &[ParsedInstruction]) -> Vec<LabelledInstruction> {
pub fn to_labelled_instructions(instructions: &[InstructionToken]) -> Vec<LabelledInstruction> {
instructions
.iter()
.map(|instruction| instruction.to_labelled())
.map(|instruction| instruction.to_labelled_instruction())
.collect()
}

Expand All @@ -104,8 +107,8 @@ pub fn pretty_print_error(s: &str, mut e: VerboseError<&str>) -> String {
}

/// Parse a program
pub fn parse(input: &str) -> Result<Vec<ParsedInstruction>, ParseError> {
let instructions = match program(input).finish() {
pub fn parse(input: &str) -> Result<Vec<InstructionToken>, ParseError> {
let instructions = match tokenize(input).finish() {
Ok((_s, instructions)) => Ok(instructions),
Err(errors) => Err(ParseError { input, errors }),
}?;
Expand All @@ -117,15 +120,15 @@ pub fn parse(input: &str) -> Result<Vec<ParsedInstruction>, ParseError> {

fn scan_missing_duplicate_labels<'a>(
input: &'a str,
instructions: &[ParsedInstruction<'a>],
instructions: &[InstructionToken<'a>],
) -> Result<(), ParseError<'a>> {
let mut seen: HashMap<&str, ParsedInstruction> = HashMap::default();
let mut duplicates: HashSet<ParsedInstruction> = HashSet::default();
let mut missings: HashSet<ParsedInstruction> = HashSet::default();
let mut seen: HashMap<&str, InstructionToken> = HashMap::default();
let mut duplicates: HashSet<InstructionToken> = HashSet::default();
let mut missings: HashSet<InstructionToken> = HashSet::default();

// Find duplicate labels, including the first occurrence of each duplicate.
for instruction in instructions.iter() {
if let ParsedInstruction::Label(label, _token_s) = instruction {
if let InstructionToken::Label(label, _token_s) = instruction {
if let Some(first_label) = seen.get(label.as_str()) {
duplicates.insert(first_label.to_owned());
duplicates.insert(instruction.to_owned());
Expand All @@ -137,7 +140,7 @@ fn scan_missing_duplicate_labels<'a>(

// Find missing labels
for instruction in instructions.iter() {
if let ParsedInstruction::Instruction(Call(addr), _token_s) = instruction {
if let InstructionToken::Instruction(Call(addr), _token_s) = instruction {
if !seen.contains_key(addr.as_str()) {
missings.insert(instruction.to_owned());
}
Expand Down Expand Up @@ -176,20 +179,21 @@ fn scan_missing_duplicate_labels<'a>(
/// error type, but we want `nom::error::VerboseError` as it allows `context()`.
type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>;

pub fn program(s: &str) -> ParseResult<Vec<ParsedInstruction>> {
///
pub fn tokenize(s: &str) -> ParseResult<Vec<InstructionToken>> {
let (s, _) = comment_or_whitespace0(s)?;
let (s, instructions) = many0(alt((label, labelled_instruction)))(s)?;
let (s, _) = context("expecting label, instruction or eof", eof)(s)?;

Ok((s, instructions))
}

fn labelled_instruction(s_instr: &str) -> ParseResult<ParsedInstruction> {
fn labelled_instruction(s_instr: &str) -> ParseResult<InstructionToken> {
let (s, instr) = an_instruction(s_instr)?;
Ok((s, ParsedInstruction::Instruction(instr, s_instr)))
Ok((s, InstructionToken::Instruction(instr, s_instr)))
}

fn label(label_s: &str) -> ParseResult<ParsedInstruction> {
fn label(label_s: &str) -> ParseResult<InstructionToken> {
let (s, addr) = label_addr(label_s)?;
let (s, _) = token0("")(s)?; // whitespace between label and ':' is allowed
let (s, _) = token0(":")(s)?; // don't require space after ':'
Expand All @@ -201,7 +205,7 @@ fn label(label_s: &str) -> ParseResult<ParsedInstruction> {
return cut(context("label cannot be named after instruction", fail))(label_s);
}

Ok((s, ParsedInstruction::Label(addr, label_s)))
Ok((s, InstructionToken::Label(addr, label_s)))
}

fn an_instruction(s: &str) -> ParseResult<AnInstruction<String>> {
Expand Down Expand Up @@ -509,6 +513,7 @@ pub mod parser_tests {

use crate::program::Program;
use crate::triton_asm;
use crate::triton_instr;
use crate::triton_program;

use super::*;
Expand All @@ -530,7 +535,7 @@ pub mod parser_tests {
match parse(test_case.input) {
Ok(actual) => assert_eq!(
test_case.expected,
Program::new(&to_labelled(&actual)),
Program::new(&to_labelled_instructions(&actual)),
"{}",
test_case.message
),
Expand Down Expand Up @@ -1010,4 +1015,9 @@ pub mod parser_tests {
.run(vec![], vec![])
.unwrap();
}

#[test]
fn triton_instruction_macro() {
assert_eq!(LabelledInstruction::Instruction(Halt), triton_instr!(halt));
}
}
4 changes: 2 additions & 2 deletions triton-vm/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::instruction::convert_all_labels_to_addresses;
use crate::instruction::Instruction;
use crate::instruction::LabelledInstruction;
use crate::parser::parse;
use crate::parser::to_labelled;
use crate::parser::to_labelled_instructions;
use crate::vm::VMState;

/// A `Program` is a `Vec<Instruction>` that contains duplicate elements for instructions with a
Expand Down Expand Up @@ -139,7 +139,7 @@ impl Program {
/// Create a `Program` by parsing source code.
pub fn from_code(code: &str) -> Result<Self> {
parse(code)
.map(|program| Program::new(&to_labelled(&program)))
.map(|program| Program::new(&to_labelled_instructions(&program)))
.map_err(|err| anyhow::anyhow!("{}", err))
}

Expand Down

0 comments on commit 648d1ed

Please sign in to comment.