From 648d1ed7e689062a83654ab6a0e22cf0ff58ff17 Mon Sep 17 00:00:00 2001
From: Alan Szepieniec <alan@neptune.cash>
Date: Wed, 12 Jul 2023 10:45:06 +0200
Subject: [PATCH] add macro triton_instr for compiling a single instruction

Co-authored-by: Ferdinand Sauer <ferdinand@neptune.cash>
---
 triton-vm/src/lib.rs     | 24 ++++++++++++----
 triton-vm/src/parser.rs  | 62 +++++++++++++++++++++++-----------------
 triton-vm/src/program.rs |  4 +--
 3 files changed, 56 insertions(+), 34 deletions(-)
diff --git a/triton-vm/src/lib.rs b/triton-vm/src/lib.rs
index 9a15c760..f236969b 100644
--- a/triton-vm/src/lib.rs
+++ b/triton-vm/src/lib.rs
@@ -34,7 +34,7 @@ pub mod stark;
 pub mod table;
 pub mod vm;
 
-/// Parse an entire program written in [Triton assembly][tasm].
+/// Compile an entire program written in [Triton assembly][tasm].
 /// The resulting [`Program`](crate::program::Program) can be
 /// [run](crate::program::Program::run).
 ///
@@ -56,7 +56,7 @@ pub mod vm;
 ///         return
 /// );
 /// let output = program.run(vec![3_u64.into()], vec![]).unwrap();
-/// assert_eq!(vec![1_u64.into()], output);
+/// assert_eq!(1, output[0].value());
 /// ```
 ///
 /// Any type with an appropriate [`Display`](std::fmt::Display) implementation can be
@@ -104,7 +104,7 @@ macro_rules! triton_program {
     }};
 }
 
-/// Parse [Triton assembly][tasm] into a list of labelled
+/// Compile [Triton assembly][tasm] into a list of labelled
 /// [`Instruction`](crate::instruction::LabelledInstruction)s.
 /// Similar to [`triton_program!`](crate::triton_program), it is possible to use string-like
 /// interpolation to insert instructions, arguments, labels, or other expressions.
@@ -123,7 +123,7 @@ macro_rules! triton_program {
 ///     push {push_argument}
 ///     some_other_label: skiz halt return
 /// );
-/// assert_eq!(6, instructions.len());
+/// assert_eq!(7, instructions.len());
 /// ```
 ///
 /// # Panics
@@ -161,8 +161,20 @@ macro_rules! triton_asm {
     };
     ($($source_code:tt)*) => {{
         let source_code = $crate::triton_asm!(@fmt "",; $($source_code)*);
-        let (_, instructions) = $crate::parser::program(&source_code).unwrap();
-        $crate::parser::to_labelled(&instructions)
+        let (_, instructions) = $crate::parser::tokenize(&source_code).unwrap();
+        $crate::parser::to_labelled_instructions(&instructions)
+    }};
+}
+
+/// Compile a single [Triton assembly][tasm] instruction. Output a
+/// [`LabelledInstruction`].
+///
+/// [tasm]: https://triton-vm.org/spec/instructions.html
+#[macro_export]
+macro_rules! triton_instr {
+    ($instr:ident) => {{
+        let (_, instructions) = $crate::parser::tokenize(stringify!($instr)).unwrap();
+        instructions[0].to_labelled_instruction()
     }};
 }
 
diff --git a/triton-vm/src/parser.rs b/triton-vm/src/parser.rs
index 73bb7473..817eeb86 100644
--- a/triton-vm/src/parser.rs
+++ b/triton-vm/src/parser.rs
@@ -35,18 +35,21 @@ pub struct ParseError<'a> {
     pub errors: VerboseError<&'a str>,
 }
 
-/// A `ParsedInstruction` has `call` addresses encoded as label names.
+/// `InstructionToken` is either an instruction with a label, or a
+/// label itself. It is intermediate object used in some middle
+/// point of the compilation pipeline. You probably want
+/// [`LabelledInstruction`].
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum ParsedInstruction<'a> {
+pub enum InstructionToken<'a> {
     Instruction(AnInstruction<String>, &'a str),
     Label(String, &'a str),
 }
 
-impl<'a> std::fmt::Display for ParsedInstruction<'a> {
+impl<'a> std::fmt::Display for InstructionToken<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            ParsedInstruction::Instruction(instr, _) => write!(f, "{instr}"),
-            ParsedInstruction::Label(label_name, _) => write!(f, "{label_name}:"),
+            InstructionToken::Instruction(instr, _) => write!(f, "{instr}"),
+            InstructionToken::Label(label_name, _) => write!(f, "{label_name}:"),
         }
     }
 }
@@ -59,16 +62,16 @@ impl<'a> std::fmt::Display for ParseError<'a> {
 
 impl<'a> Error for ParseError<'a> {}
 
-impl<'a> ParsedInstruction<'a> {
+impl<'a> InstructionToken<'a> {
     pub fn token_str(&self) -> &'a str {
         match self {
-            ParsedInstruction::Instruction(_, token_str) => token_str,
-            ParsedInstruction::Label(_, token_str) => token_str,
+            InstructionToken::Instruction(_, token_str) => token_str,
+            InstructionToken::Label(_, token_str) => token_str,
         }
     }
 
-    pub fn to_labelled(&self) -> LabelledInstruction {
-        use ParsedInstruction::*;
+    pub fn to_labelled_instruction(&self) -> LabelledInstruction {
+        use InstructionToken::*;
         match self {
             Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()),
             Label(label, _) => LabelledInstruction::Label(label.to_owned()),
@@ -76,10 +79,10 @@ impl<'a> ParsedInstruction<'a> {
     }
 }
 
-pub fn to_labelled(instructions: &[ParsedInstruction]) -> Vec<LabelledInstruction> {
+pub fn to_labelled_instructions(instructions: &[InstructionToken]) -> Vec<LabelledInstruction> {
     instructions
         .iter()
-        .map(|instruction| instruction.to_labelled())
+        .map(|instruction| instruction.to_labelled_instruction())
         .collect()
 }
 
@@ -104,8 +107,8 @@ pub fn pretty_print_error(s: &str, mut e: VerboseError<&str>) -> String {
 }
 
 /// Parse a program
-pub fn parse(input: &str) -> Result<Vec<ParsedInstruction>, ParseError> {
-    let instructions = match program(input).finish() {
+pub fn parse(input: &str) -> Result<Vec<InstructionToken>, ParseError> {
+    let instructions = match tokenize(input).finish() {
         Ok((_s, instructions)) => Ok(instructions),
         Err(errors) => Err(ParseError { input, errors }),
     }?;
@@ -117,15 +120,15 @@ pub fn parse(input: &str) -> Result<Vec<ParsedInstruction>, ParseError> {
 
 fn scan_missing_duplicate_labels<'a>(
     input: &'a str,
-    instructions: &[ParsedInstruction<'a>],
+    instructions: &[InstructionToken<'a>],
 ) -> Result<(), ParseError<'a>> {
-    let mut seen: HashMap<&str, ParsedInstruction> = HashMap::default();
-    let mut duplicates: HashSet<ParsedInstruction> = HashSet::default();
-    let mut missings: HashSet<ParsedInstruction> = HashSet::default();
+    let mut seen: HashMap<&str, InstructionToken> = HashMap::default();
+    let mut duplicates: HashSet<InstructionToken> = HashSet::default();
+    let mut missings: HashSet<InstructionToken> = HashSet::default();
 
     // Find duplicate labels, including the first occurrence of each duplicate.
     for instruction in instructions.iter() {
-        if let ParsedInstruction::Label(label, _token_s) = instruction {
+        if let InstructionToken::Label(label, _token_s) = instruction {
             if let Some(first_label) = seen.get(label.as_str()) {
                 duplicates.insert(first_label.to_owned());
                 duplicates.insert(instruction.to_owned());
@@ -137,7 +140,7 @@ fn scan_missing_duplicate_labels<'a>(
 
     // Find missing labels
     for instruction in instructions.iter() {
-        if let ParsedInstruction::Instruction(Call(addr), _token_s) = instruction {
+        if let InstructionToken::Instruction(Call(addr), _token_s) = instruction {
             if !seen.contains_key(addr.as_str()) {
                 missings.insert(instruction.to_owned());
             }
@@ -176,7 +179,8 @@ fn scan_missing_duplicate_labels<'a>(
 /// error type, but we want `nom::error::VerboseError` as it allows `context()`.
 type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>;
 
-pub fn program(s: &str) -> ParseResult<Vec<ParsedInstruction>> {
+///
+pub fn tokenize(s: &str) -> ParseResult<Vec<InstructionToken>> {
     let (s, _) = comment_or_whitespace0(s)?;
     let (s, instructions) = many0(alt((label, labelled_instruction)))(s)?;
     let (s, _) = context("expecting label, instruction or eof", eof)(s)?;
@@ -184,12 +188,12 @@ pub fn program(s: &str) -> ParseResult<Vec<ParsedInstruction>> {
     Ok((s, instructions))
 }
 
-fn labelled_instruction(s_instr: &str) -> ParseResult<ParsedInstruction> {
+fn labelled_instruction(s_instr: &str) -> ParseResult<InstructionToken> {
     let (s, instr) = an_instruction(s_instr)?;
-    Ok((s, ParsedInstruction::Instruction(instr, s_instr)))
+    Ok((s, InstructionToken::Instruction(instr, s_instr)))
 }
 
-fn label(label_s: &str) -> ParseResult<ParsedInstruction> {
+fn label(label_s: &str) -> ParseResult<InstructionToken> {
     let (s, addr) = label_addr(label_s)?;
     let (s, _) = token0("")(s)?; // whitespace between label and ':' is allowed
     let (s, _) = token0(":")(s)?; // don't require space after ':'
@@ -201,7 +205,7 @@ fn label(label_s: &str) -> ParseResult<ParsedInstruction> {
         return cut(context("label cannot be named after instruction", fail))(label_s);
     }
 
-    Ok((s, ParsedInstruction::Label(addr, label_s)))
+    Ok((s, InstructionToken::Label(addr, label_s)))
 }
 
 fn an_instruction(s: &str) -> ParseResult<AnInstruction<String>> {
@@ -509,6 +513,7 @@ pub mod parser_tests {
 
     use crate::program::Program;
     use crate::triton_asm;
+    use crate::triton_instr;
     use crate::triton_program;
 
     use super::*;
@@ -530,7 +535,7 @@ pub mod parser_tests {
         match parse(test_case.input) {
             Ok(actual) => assert_eq!(
                 test_case.expected,
-                Program::new(&to_labelled(&actual)),
+                Program::new(&to_labelled_instructions(&actual)),
                 "{}",
                 test_case.message
             ),
@@ -1010,4 +1015,9 @@ pub mod parser_tests {
             .run(vec![], vec![])
             .unwrap();
     }
+
+    #[test]
+    fn triton_instruction_macro() {
+        assert_eq!(LabelledInstruction::Instruction(Halt), triton_instr!(halt));
+    }
 }
diff --git a/triton-vm/src/program.rs b/triton-vm/src/program.rs
index 5618be96..e99309e4 100644
--- a/triton-vm/src/program.rs
+++ b/triton-vm/src/program.rs
@@ -19,7 +19,7 @@ use crate::instruction::convert_all_labels_to_addresses;
 use crate::instruction::Instruction;
 use crate::instruction::LabelledInstruction;
 use crate::parser::parse;
-use crate::parser::to_labelled;
+use crate::parser::to_labelled_instructions;
 use crate::vm::VMState;
 
 /// A `Program` is a `Vec<Instruction>` that contains duplicate elements for instructions with a
@@ -139,7 +139,7 @@ impl Program {
     /// Create a `Program` by parsing source code.
     pub fn from_code(code: &str) -> Result<Self> {
         parse(code)
-            .map(|program| Program::new(&to_labelled(&program)))
+            .map(|program| Program::new(&to_labelled_instructions(&program)))
             .map_err(|err| anyhow::anyhow!("{}", err))
     }