diff --git a/.gitignore b/.gitignore index 6f429f5..165e8e1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ node_modules .vscode-test /target Cargo.lock -assets \ No newline at end of file +assets +*.new +*.pending-snap \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 5dc248b..bfbc8e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,5 +10,6 @@ syntax = {path = './crates/syntax', version = "0.1.0"} circom-lsp = {path = './crates/lsp', version = "*"} common = { path = './crates/common', version = "*"} database = {path = "./crates/database", version = "*"} + [workspace.package] rust-version = "1.71" diff --git a/SNAPSHOT_TEST.md b/SNAPSHOT_TEST.md new file mode 100644 index 0000000..304166f --- /dev/null +++ b/SNAPSHOT_TEST.md @@ -0,0 +1,11 @@ +# Snapshot Test + +* Run all tests: + ``` + cargo test + ``` + +* Review snapshot changes + ``` + cargo insta review + ``` \ No newline at end of file diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 5021d30..0173d54 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -11,7 +11,16 @@ lsp-types = {version = "0.94.1", features = ["proposed"]} rowan = "0.15.15" num-traits = "0.2" num-derive = "0.2" - +serde = "1.0.216" [profile.dev] -debug = 2 \ No newline at end of file +debug = 2 + +[dev-dependencies] +# for snapshot testing, yaml format +insta = { version = "1.41.1", features = ["yaml"] } + +[profile.dev.package] +# compile slightly slower once, but use less memory, have faster diffs +insta.opt-level = 3 +similar.opt-level = 3 \ No newline at end of file diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs index 16996b9..bd9c2cc 100644 --- a/crates/parser/src/event.rs +++ b/crates/parser/src/event.rs @@ -1,8 +1,9 @@ use crate::token_kind::TokenKind; -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone)] pub enum Event { Open { kind: TokenKind }, Close, TokenPosition(usize), + ErrorReport(String), } diff --git a/crates/parser/src/grammar/block.rs b/crates/parser/src/grammar/block.rs index 03f7ed2..c6317d5 100644 --- a/crates/parser/src/grammar/block.rs +++ b/crates/parser/src/grammar/block.rs @@ -1,5 +1,13 @@ use super::*; +/* +{ + / + / + .... + / +} +*/ pub fn block(p: &mut Parser) { p.inc_rcurly(); @@ -7,7 +15,7 @@ pub fn block(p: &mut Parser) { p.advance_with_error("Miss {"); } else { let m = p.open(); - p.eat(LCurly); + p.expect(LCurly); let stmt_marker = p.open(); while !p.at(RCurly) && !p.eof() { let kind = p.current(); diff --git a/crates/parser/src/grammar/declaration.rs b/crates/parser/src/grammar/declaration.rs index c37630c..c5dd125 100644 --- a/crates/parser/src/grammar/declaration.rs +++ b/crates/parser/src/grammar/declaration.rs @@ -128,9 +128,3 @@ pub(super) fn declaration(p: &mut Parser) { _ => unreachable!(), } } - -#[cfg(test)] -mod declar_tests { - #[test] - fn signal_with_tag() {} -} diff --git a/crates/parser/src/grammar/expression.rs b/crates/parser/src/grammar/expression.rs index 8c7fad6..4604da4 100644 --- a/crates/parser/src/grammar/expression.rs +++ b/crates/parser/src/grammar/expression.rs @@ -156,25 +156,3 @@ fn circom_expression(p: &mut Parser) { } } } -// #[cfg(test)] -// mod tests { - -// use rowan::SyntaxNode; - -// use crate::{syntax_node::CircomLang}; - -// use super::{entry::Scope, Parser}; - -// #[test] -// fn test_expression() { -// let source = r#" -// { -// a.tmp <== 100; -// b[1].c <== 10; -// } -// "#; -// let green = Parser::parse_scope(source, Scope::Block); -// let node = SyntaxNode::::new_root(green); -// println!("{:#?}", node); -// } -// } diff --git a/crates/parser/src/grammar/pragma.rs b/crates/parser/src/grammar/pragma.rs index bacf839..9ce3eaf 100644 --- a/crates/parser/src/grammar/pragma.rs +++ b/crates/parser/src/grammar/pragma.rs @@ -14,26 +14,3 @@ pub fn pragma(p: &mut Parser) { p.expect(Semicolon); p.close(m, Pragma); } - -// #[cfg(test)] -// mod tests { -// #[test] -// fn pragam_test() { -// use crate::{ -// ast::{AstNode, AstPragma}, -// syntax_node::SyntaxNode, -// token_kind::TokenKind, -// }; - -// use super::{entry::Scope, Parser}; - -// let source: String = r#"pragma circom 2.0.1;"#.to_string(); - -// let green_node = Parser::parse_scope(&source, Scope::Pragma); -// let node = SyntaxNode::new_root(green_node); - -// let pragma = AstPragma::cast(node.last_child().unwrap()).unwrap(); - -// assert!(pragma.version().unwrap().syntax().kind() == TokenKind::Version); -// } -// } diff --git a/crates/parser/src/grammar/statement.rs b/crates/parser/src/grammar/statement.rs index a4ee698..84ca0a6 100644 --- a/crates/parser/src/grammar/statement.rs +++ b/crates/parser/src/grammar/statement.rs @@ -9,6 +9,12 @@ pub(super) fn statement(p: &mut Parser) { p.close(m, Statement); } +/* +if (expr) + +else + +*/ fn if_statement(p: &mut Parser) { let m = p.open(); p.expect(IfKw); @@ -25,6 +31,7 @@ fn if_statement(p: &mut Parser) { /** * no if condition here. + * for/while/return/assert... */ fn statement_no_condition(p: &mut Parser) { match p.current() { @@ -50,6 +57,10 @@ fn statement_no_condition(p: &mut Parser) { } } +/* +for (/; ; ) + +*/ fn for_statement(p: &mut Parser) { let m = p.open(); p.expect(ForKw); @@ -70,6 +81,10 @@ fn for_statement(p: &mut Parser) { p.close(m, ForLoop); } +/* +while () + +*/ fn while_statement(p: &mut Parser) { p.expect(WhileKw); p.expect(LParen); @@ -78,6 +93,9 @@ fn while_statement(p: &mut Parser) { statement(p); } +/* +assert() +*/ fn assert_statement(p: &mut Parser) { let m = p.open(); p.expect(AssertKw); @@ -87,6 +105,9 @@ fn assert_statement(p: &mut Parser) { p.close(m, AssertKw); } +/* +log() +*/ fn log_statement(p: &mut Parser) { let m = p.open(); p.expect(LogKw); @@ -109,6 +130,9 @@ fn log_statement(p: &mut Parser) { p.close(m, LogKw); } +/* +return +*/ fn return_statement(p: &mut Parser) { let m = p.open(); p.expect(ReturnKw); @@ -116,6 +140,9 @@ fn return_statement(p: &mut Parser) { p.close(m, ReturnKw); } +/* + +*/ fn assignment_statement(p: &mut Parser) { let m = p.open(); @@ -155,20 +182,3 @@ fn assignment_statement(p: &mut Parser) { p.close(m, Error); } } - -#[cfg(test)] -mod tests { - - #[test] - fn if_statement_test() { - let _source = r#" - assert(1 == 2); - "#; - // let mut parser = Parser::new(source); - - // statement(&mut parser); - // let cst = parser.build_tree().ok().unwrap(); - - // println!("{:?}", cst); - } -} diff --git a/crates/parser/src/grammar/template.rs b/crates/parser/src/grammar/template.rs index 693fe68..a502365 100644 --- a/crates/parser/src/grammar/template.rs +++ b/crates/parser/src/grammar/template.rs @@ -23,42 +23,3 @@ pub fn template(p: &mut Parser) { p.close(m, TemplateDef); } - -// #[cfg(test)] -// mod tests { -// use crate::ast::AstTemplateDef; - -// #[test] -// fn template_parse_test() { -// use crate::{ast::AstNode, syntax_node::SyntaxNode}; - -// use super::{entry::Scope, Parser}; - -// let source: String = r#" -// template Multiplier2 (a, b, c) { - -// // Declaration of signals. -// signal input a; -// signal input b; -// signal output c; - -// // Constraints. -// c <== a * b; -// } - -// "# -// .to_string(); - -// let green_node = ::parse_scope(&source, Scope::Template); -// let node = SyntaxNode::new_root(green_node); - -// let ast_template = AstTemplateDef::cast(node); - -// if let Some(ast_internal) = ast_template { -// println!( -// "name {:?}", -// ast_internal.template_name().unwrap().syntax().text() -// ); -// } -// } -// } diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index 8ddf602..6278fd8 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -2,9 +2,11 @@ use std::ops::Range; use logos::Lexer; +use serde::Serialize; + use crate::token_kind::TokenKind; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Serialize)] pub struct Input<'a> { kind: Vec, source: &'a str, @@ -81,65 +83,12 @@ impl<'a> Input<'a> { #[cfg(test)] mod tests { - use crate::token_kind::TokenKind::{self, *}; - use super::Input; - fn test(source: &str, expected_input: Input) { + fn test(source: &str, snapshot_name: &str) { let input = Input::new(&source); - assert_eq!( - expected_input, input, - "Tokens extract from source code are not correct" - ); - - // test size method - let expected_size = input.kind.len(); - let size = input.size(); - assert_eq!(expected_size, size, "size method failed"); - - // test methods with index out of bound - let index = input.kind.len(); - - let expected_token_value = None; - let token_value = input.token_value(index); - assert_eq!( - expected_token_value, token_value, - "token_value failed (case: index out of bound)" - ); - - let expected_kind = TokenKind::EOF; - let kind = input.kind_of(index); - assert_eq!( - expected_kind, kind, - "kind_of failed (case: index out of bound)" - ); - - let expected_position = None; - let position = input.position_of(index); - assert_eq!( - expected_position, position, - "position_of failed (case: index out of bound)" - ); - - // test methods with index in bound - if input.size() == 0 { - return; - } - - let index = input.size() / 2; // a valid index if input size > 0 - - let expected_token_value = &input.source[input.position[index].clone()]; - let token_value = input.token_value(index).unwrap(); - assert_eq!(expected_token_value, token_value, "token_value failed"); - - let expected_kind = input.kind[index]; - let kind = input.kind_of(index); - assert_eq!(expected_kind, kind, "kind_of failed"); - - let expected_position = input.position[index].clone(); - let position = input.position_of(index).unwrap(); - assert_eq!(expected_position, position, "position_of failed"); + insta::assert_yaml_snapshot!(snapshot_name, input); } #[test] @@ -148,40 +97,7 @@ mod tests { /*a + b == 10*/ a + 10 "#; - - let expected_input = Input { - kind: vec![ - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::BlockComment, - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::Identifier, - TokenKind::WhiteSpace, - TokenKind::Add, - TokenKind::WhiteSpace, - TokenKind::Number, - TokenKind::EndLine, - TokenKind::WhiteSpace, - ], - source: &source, - position: vec![ - { 0..1 }, - { 1..9 }, - { 9..24 }, - { 24..25 }, - { 25..33 }, - { 33..34 }, - { 34..35 }, - { 35..36 }, - { 36..37 }, - { 37..39 }, - { 39..40 }, - { 40..44 }, - ], - }; - - test(source, expected_input); + test(source, "test_comment_block"); } #[test] @@ -194,34 +110,7 @@ mod tests { /* "#; - - let expected_input = Input { - kind: vec![ - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::Pragma, - TokenKind::WhiteSpace, - TokenKind::Version, - TokenKind::Semicolon, - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::Error, - ], - source: &source, - position: vec![ - 0..1, - 1..9, - 9..15, - 15..16, - 16..21, - 21..22, - 22..23, - 23..31, - 31..94, - ], - }; - - test(source, expected_input); + test(source, "test_comment_error"); } #[test] @@ -232,46 +121,7 @@ mod tests { pragma circom 2.0.0; "#; - - let expected_input = Input { - kind: vec![ - EndLine, - WhiteSpace, - BlockComment, - EndLine, - EndLine, - WhiteSpace, - Pragma, - WhiteSpace, - Circom, - WhiteSpace, - Version, - Semicolon, - EndLine, - EndLine, - WhiteSpace, - ], - source: &source, - position: vec![ - 0..1, - 1..9, - 9..38, - 38..39, - 39..40, - 40..44, - 44..50, - 50..51, - 51..57, - 57..58, - 58..63, - 63..64, - 64..65, - 65..66, - 66..70, - ], - }; - - test(source, expected_input); + test(source, "test_pragma"); } #[test] @@ -286,104 +136,37 @@ mod tests { } return r; }"#; - - let expected_input = Input { - kind: vec![ - EndLine, WhiteSpace, FunctionKw, WhiteSpace, Identifier, LParen, Identifier, - RParen, WhiteSpace, LCurly, EndLine, WhiteSpace, VarKw, WhiteSpace, Identifier, - WhiteSpace, Assign, WhiteSpace, Number, Semicolon, EndLine, WhiteSpace, VarKw, - WhiteSpace, Identifier, WhiteSpace, Assign, WhiteSpace, Number, Semicolon, EndLine, - WhiteSpace, WhileKw, WhiteSpace, LParen, Identifier, Sub, Number, LessThan, - Identifier, RParen, WhiteSpace, LCurly, EndLine, WhiteSpace, Identifier, Add, Add, - Semicolon, EndLine, WhiteSpace, Identifier, WhiteSpace, Mul, Assign, WhiteSpace, - Number, Semicolon, EndLine, WhiteSpace, RCurly, EndLine, WhiteSpace, ReturnKw, - WhiteSpace, Identifier, Semicolon, EndLine, WhiteSpace, RCurly, - ], - source: &source, - position: vec![ - 0..1, - 1..5, - 5..13, - 13..14, - 14..19, - 19..20, - 20..21, - 21..22, - 22..23, - 23..24, - 24..25, - 25..33, - 33..36, - 36..37, - 37..38, - 38..39, - 39..40, - 40..41, - 41..42, - 42..43, - 43..44, - 44..52, - 52..55, - 55..56, - 56..57, - 57..58, - 58..59, - 59..60, - 60..61, - 61..62, - 62..63, - 63..71, - 71..76, - 76..77, - 77..78, - 78..79, - 79..80, - 80..81, - 81..82, - 82..83, - 83..84, - 84..85, - 85..86, - 86..87, - 87..99, - 99..100, - 100..101, - 101..102, - 102..103, - 103..104, - 104..116, - 116..117, - 117..118, - 118..119, - 119..120, - 120..121, - 121..122, - 122..123, - 123..124, - 124..132, - 132..133, - 133..134, - 134..142, - 142..148, - 148..149, - 149..150, - 150..151, - 151..152, - 152..156, - 156..157, - ], - }; - - test(source, expected_input); + test(source, "test_function"); + test(source, "test_function"); } - // #[test] - // fn test_gen() { - // let source = r#" - // "#; - - // let input = Input::new(&source); - // println!("{:?}", input.kind); - // println!("{:?}", input.position); - // } + #[test] + fn test_operators() { + let source = r#" + ({[]}) + ;.,: + && & + || | + != ! + === == = + --> ==> + <-- <== + <= < + >= > + ++ += + + -- -= - + **= ** + * *= + / /= + \ \= + % %= + ^ ^= + ~ ~= + >> >>= + << <<= + & &= + | |= + }"#; + test(source, "test_operators"); + } } diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 271c2c3..b594a3e 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -3,6 +3,7 @@ use crate::{event::Event, token_kind::TokenKind}; #[derive(Debug)] pub enum Child { Token(usize), // position of token, + Error(String), Tree(Tree), } @@ -58,6 +59,13 @@ impl From> for Output { .children .push(Child::Token(*token)); } + Event::ErrorReport(error) => { + stack + .last_mut() + .unwrap() + .children + .push(Child::Error(error.clone())); + } } } } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index a4f0efe..34a2f64 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -28,7 +28,29 @@ pub enum ParserError { } impl<'a> Parser<'a> { + pub fn wrap_trivial_tokens(&mut self) -> TokenKind { + loop { + let kind = self.input.kind_of(self.pos); + + if kind.is_trivial() == false { + return kind; + } + + self.events.push(Event::Open { kind }); + + self.fuel.set(256); + self.events.push(Event::TokenPosition(self.pos)); + self.skip(); + + self.events.push(Event::Close); + } + } + pub fn open(&mut self) -> Marker { + if self.events.len() > 0 { + self.wrap_trivial_tokens(); + } + let marker = Marker::Open(self.events.len()); self.events.push(Event::Open { kind: TokenKind::Error, @@ -88,6 +110,15 @@ impl<'a> Parser<'a> { } self.close(m, TokenKind::Error); } + + pub fn error_report(&mut self, error: String) { + let m = self.open(); + + let token = Event::ErrorReport(error); + self.events.push(token); + + self.close(m, TokenKind::Error); + } } impl<'a> Parser<'a> { @@ -110,19 +141,7 @@ impl<'a> Parser<'a> { } pub fn current(&mut self) -> TokenKind { - let mut kind: TokenKind; - loop { - kind = self.input.kind_of(self.pos); - if !kind.is_trivial() { - break; - } - - let m = self.open(); - self.advance(); - self.close(m, kind); - } - - kind + self.wrap_trivial_tokens() } pub fn next(&mut self) -> TokenKind { @@ -162,6 +181,7 @@ impl<'a> Parser<'a> { self.advance(); return true; } + false } @@ -170,19 +190,17 @@ impl<'a> Parser<'a> { if kinds.contains(&kind) { self.advance(); } else { - // error report - // println!("expect {:?} but got {:?}", kinds, kind); + let error = format!("expect {:?} but got {:?}", kinds, kind); + self.error_report(error); } } pub fn expect(&mut self, kind: TokenKind) { - let _current = self.current(); - if self.at(kind) { self.advance(); } else { - // error report - // println!("expect {:?} but got {:?}", kind, current); + let error = format!("expect {:?} but got {:?}", kind, self.current()); + self.error_report(error); } } diff --git a/crates/parser/src/snapshots/parser__input__tests__test_comment_block.snap b/crates/parser/src/snapshots/parser__input__tests__test_comment_block.snap new file mode 100644 index 0000000..f6bdb27 --- /dev/null +++ b/crates/parser/src/snapshots/parser__input__tests__test_comment_block.snap @@ -0,0 +1,43 @@ +--- +source: crates/parser/src/input.rs +expression: input +--- +kind: + - EndLine + - WhiteSpace + - BlockComment + - EndLine + - WhiteSpace + - Identifier + - WhiteSpace + - Add + - WhiteSpace + - Number + - EndLine + - WhiteSpace +source: "\n /*a + b == 10*/\n a + 10\n " +position: + - start: 0 + end: 1 + - start: 1 + end: 9 + - start: 9 + end: 24 + - start: 24 + end: 25 + - start: 25 + end: 33 + - start: 33 + end: 34 + - start: 34 + end: 35 + - start: 35 + end: 36 + - start: 36 + end: 37 + - start: 37 + end: 39 + - start: 39 + end: 40 + - start: 40 + end: 44 diff --git a/crates/parser/src/snapshots/parser__input__tests__test_comment_error.snap b/crates/parser/src/snapshots/parser__input__tests__test_comment_error.snap new file mode 100644 index 0000000..f8307a5 --- /dev/null +++ b/crates/parser/src/snapshots/parser__input__tests__test_comment_error.snap @@ -0,0 +1,34 @@ +--- +source: crates/parser/src/input.rs +expression: input +--- +kind: + - EndLine + - WhiteSpace + - Pragma + - WhiteSpace + - Version + - Semicolon + - EndLine + - WhiteSpace + - Error +source: "\n pragma 2.1.1;\n /*a + b == 10*\n a + 10\n template\n\n /*\n " +position: + - start: 0 + end: 1 + - start: 1 + end: 9 + - start: 9 + end: 15 + - start: 15 + end: 16 + - start: 16 + end: 21 + - start: 21 + end: 22 + - start: 22 + end: 23 + - start: 23 + end: 31 + - start: 31 + end: 94 diff --git a/crates/parser/src/snapshots/parser__input__tests__test_function.snap b/crates/parser/src/snapshots/parser__input__tests__test_function.snap new file mode 100644 index 0000000..39cbb10 --- /dev/null +++ b/crates/parser/src/snapshots/parser__input__tests__test_function.snap @@ -0,0 +1,211 @@ +--- +source: crates/parser/src/input.rs +expression: input +--- +kind: + - EndLine + - WhiteSpace + - FunctionKw + - WhiteSpace + - Identifier + - LParen + - Identifier + - RParen + - WhiteSpace + - LCurly + - EndLine + - WhiteSpace + - VarKw + - WhiteSpace + - Identifier + - WhiteSpace + - Assign + - WhiteSpace + - Number + - Semicolon + - EndLine + - WhiteSpace + - VarKw + - WhiteSpace + - Identifier + - WhiteSpace + - Assign + - WhiteSpace + - Number + - Semicolon + - EndLine + - WhiteSpace + - WhileKw + - WhiteSpace + - LParen + - Identifier + - Sub + - Number + - LessThan + - Identifier + - RParen + - WhiteSpace + - LCurly + - EndLine + - WhiteSpace + - Identifier + - UnitInc + - Semicolon + - EndLine + - WhiteSpace + - Identifier + - WhiteSpace + - MulAssign + - WhiteSpace + - Number + - Semicolon + - EndLine + - WhiteSpace + - RCurly + - EndLine + - WhiteSpace + - ReturnKw + - WhiteSpace + - Identifier + - Semicolon + - EndLine + - WhiteSpace + - RCurly +source: "\n function nbits(a) {\n var n = 1;\n var r = 0;\n while (n-1 ==>\n <-- <==\n <= <\n >= >\n ++ += +\n -- -= -\n **= **\n * *=\n / /=\n \\ \\=\n % %=\n ^ ^=\n ~ ~=\n >> >>=\n << <<=\n & &=\n | |=\n }" +position: + - start: 0 + end: 1 + - start: 1 + end: 9 + - start: 9 + end: 10 + - start: 10 + end: 11 + - start: 11 + end: 12 + - start: 12 + end: 13 + - start: 13 + end: 14 + - start: 14 + end: 15 + - start: 15 + end: 16 + - start: 16 + end: 24 + - start: 24 + end: 25 + - start: 25 + end: 26 + - start: 26 + end: 27 + - start: 27 + end: 28 + - start: 28 + end: 29 + - start: 29 + end: 37 + - start: 37 + end: 39 + - start: 39 + end: 40 + - start: 40 + end: 41 + - start: 41 + end: 42 + - start: 42 + end: 50 + - start: 50 + end: 52 + - start: 52 + end: 53 + - start: 53 + end: 54 + - start: 54 + end: 55 + - start: 55 + end: 63 + - start: 63 + end: 65 + - start: 65 + end: 66 + - start: 66 + end: 67 + - start: 67 + end: 68 + - start: 68 + end: 76 + - start: 76 + end: 79 + - start: 79 + end: 80 + - start: 80 + end: 82 + - start: 82 + end: 83 + - start: 83 + end: 84 + - start: 84 + end: 85 + - start: 85 + end: 93 + - start: 93 + end: 96 + - start: 96 + end: 97 + - start: 97 + end: 100 + - start: 100 + end: 101 + - start: 101 + end: 109 + - start: 109 + end: 112 + - start: 112 + end: 113 + - start: 113 + end: 116 + - start: 116 + end: 117 + - start: 117 + end: 125 + - start: 125 + end: 127 + - start: 127 + end: 128 + - start: 128 + end: 129 + - start: 129 + end: 130 + - start: 130 + end: 138 + - start: 138 + end: 140 + - start: 140 + end: 141 + - start: 141 + end: 142 + - start: 142 + end: 143 + - start: 143 + end: 151 + - start: 151 + end: 153 + - start: 153 + end: 154 + - start: 154 + end: 156 + - start: 156 + end: 157 + - start: 157 + end: 158 + - start: 158 + end: 159 + - start: 159 + end: 167 + - start: 167 + end: 169 + - start: 169 + end: 170 + - start: 170 + end: 172 + - start: 172 + end: 173 + - start: 173 + end: 174 + - start: 174 + end: 175 + - start: 175 + end: 183 + - start: 183 + end: 186 + - start: 186 + end: 187 + - start: 187 + end: 189 + - start: 189 + end: 190 + - start: 190 + end: 198 + - start: 198 + end: 199 + - start: 199 + end: 200 + - start: 200 + end: 202 + - start: 202 + end: 203 + - start: 203 + end: 211 + - start: 211 + end: 212 + - start: 212 + end: 213 + - start: 213 + end: 215 + - start: 215 + end: 216 + - start: 216 + end: 224 + - start: 224 + end: 225 + - start: 225 + end: 226 + - start: 226 + end: 228 + - start: 228 + end: 229 + - start: 229 + end: 237 + - start: 237 + end: 238 + - start: 238 + end: 239 + - start: 239 + end: 241 + - start: 241 + end: 242 + - start: 242 + end: 250 + - start: 250 + end: 251 + - start: 251 + end: 252 + - start: 252 + end: 254 + - start: 254 + end: 255 + - start: 255 + end: 263 + - start: 263 + end: 264 + - start: 264 + end: 265 + - start: 265 + end: 267 + - start: 267 + end: 268 + - start: 268 + end: 276 + - start: 276 + end: 278 + - start: 278 + end: 279 + - start: 279 + end: 282 + - start: 282 + end: 283 + - start: 283 + end: 291 + - start: 291 + end: 293 + - start: 293 + end: 294 + - start: 294 + end: 297 + - start: 297 + end: 298 + - start: 298 + end: 306 + - start: 306 + end: 307 + - start: 307 + end: 308 + - start: 308 + end: 310 + - start: 310 + end: 311 + - start: 311 + end: 319 + - start: 319 + end: 320 + - start: 320 + end: 321 + - start: 321 + end: 323 + - start: 323 + end: 324 + - start: 324 + end: 328 + - start: 328 + end: 329 diff --git a/crates/parser/src/snapshots/parser__input__tests__test_pragma.snap b/crates/parser/src/snapshots/parser__input__tests__test_pragma.snap new file mode 100644 index 0000000..48b3f54 --- /dev/null +++ b/crates/parser/src/snapshots/parser__input__tests__test_pragma.snap @@ -0,0 +1,52 @@ +--- +source: crates/parser/src/input.rs +expression: input +--- +kind: + - EndLine + - WhiteSpace + - BlockComment + - EndLine + - EndLine + - WhiteSpace + - Pragma + - WhiteSpace + - Circom + - WhiteSpace + - Version + - Semicolon + - EndLine + - EndLine + - WhiteSpace +source: "\n /* test pragma token kinds */\n\n pragma circom 2.0.0;\n\n " +position: + - start: 0 + end: 1 + - start: 1 + end: 9 + - start: 9 + end: 38 + - start: 38 + end: 39 + - start: 39 + end: 40 + - start: 40 + end: 44 + - start: 44 + end: 50 + - start: 50 + end: 51 + - start: 51 + end: 57 + - start: 57 + end: 58 + - start: 58 + end: 63 + - start: 63 + end: 64 + - start: 64 + end: 65 + - start: 65 + end: 66 + - start: 66 + end: 70 diff --git a/crates/parser/src/token_kind.rs b/crates/parser/src/token_kind.rs index 9ebba15..2101ef9 100644 --- a/crates/parser/src/token_kind.rs +++ b/crates/parser/src/token_kind.rs @@ -1,260 +1,355 @@ -use logos::Logos; - -#[derive(Logos, Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash)] -#[allow(non_camel_case_types)] -#[repr(u16)] -pub enum TokenKind { - #[error] - Error = 0, - #[regex(r"//[^\n]*")] - CommentLine, - #[token("/*")] - CommentBlockOpen, - #[token("*/")] - CommentBlockClose, - #[regex("[ \t]+")] - WhiteSpace, - #[regex("[\n]")] - EndLine, - #[token("pragma")] - Pragma, - #[token("circom")] - Circom, - #[regex("2.[0-9].[0-9]")] - Version, - #[regex("[0-9]+")] - Number, - #[regex("[$_]*[a-zA-Z][a-zA-Z0-9_$]*")] - Identifier, - #[regex(r#""[^"]*""#)] - CircomString, - #[token("template")] - TemplateKw, - #[token("function")] - FunctionKw, - #[token("component")] - ComponentKw, - #[token("main")] - MainKw, - #[token("public")] - PublicKw, - #[token("signal")] - SignalKw, - #[token("var")] - VarKw, - #[token("include")] - IncludeKw, - #[token("input")] - InputKw, - #[token("output")] - OutputKw, - #[token("log")] - LogKw, - #[token("(")] - LParen, - #[token(")")] - RParen, - #[token("{")] - LCurly, - #[token("}")] - RCurly, - #[token("[")] - LBracket, - #[token("]")] - RBracket, - #[token(";")] - Semicolon, - #[token(",")] - Comma, - #[token("=")] - Assign, - #[token("===")] - EqualSignal, - #[token("-->")] - LAssignSignal, - #[token("==>")] - LAssignContraintSignal, - #[token("<--")] - RAssignSignal, - #[token("<==")] - RAssignConstraintSignal, - #[token("+")] - Add, - #[token("-")] - Sub, - #[token("/")] - Div, - #[token("*")] - Mul, - #[token("!")] - Not, - #[token("~")] - BitNot, - #[token("**")] - Power, - #[token("\\")] - IntDiv, - #[token("%")] - Mod, - #[token("<<")] - ShiftL, - #[token(">>")] - ShiftR, - #[token("&")] - BitAnd, - #[token("|")] - BitOr, - #[token("^")] - BitXor, - #[token("==")] - Equal, - #[token("!=")] - NotEqual, - #[token("<")] - LessThan, - #[token(">")] - GreaterThan, - #[token("<=")] - LessThanAndEqual, - #[token(">=")] - GreaterThanAndEqual, - #[token("&&")] - BoolAnd, - #[token("||")] - BoolOr, - #[token("?")] - MarkQuestion, - #[token(":")] - Colon, - #[token(".")] - Dot, - #[token("if")] - IfKw, - #[token("else")] - ElseKw, - #[token("for")] - ForKw, - #[token("while")] - WhileKw, - #[token("return")] - ReturnKw, - #[token("assert")] - AssertKw, - ForLoop, - AssignStatement, - CircomProgram, - SignalOfComponent, - SignalHeader, - Block, - Tuple, - TupleInit, - Call, - TenaryConditional, - Condition, - Expression, - FunctionDef, - Statement, - StatementList, - ComponentDecl, - TemplateDef, - TemplateName, - FunctionName, - ParameterList, - SignalDecl, - VarDecl, - InputSignalDecl, - OutputSignalDecl, - ComponentCall, - ComponentIdentifier, - SignalIdentifier, - ArrayQuery, - ParserError, - BlockComment, - EOF, - ROOT, - __LAST, -} - -impl From for TokenKind { - #[inline] - fn from(d: u16) -> TokenKind { - assert!(d <= (TokenKind::__LAST as u16)); - unsafe { std::mem::transmute::(d) } - } -} - -impl From for TokenKind { - fn from(value: rowan::SyntaxKind) -> Self { - match value { - rowan::SyntaxKind(id) => TokenKind::from(id), - } - } -} - -impl From for u16 { - #[inline] - fn from(k: TokenKind) -> u16 { - k as u16 - } -} - -impl From for rowan::SyntaxKind { - fn from(kind: TokenKind) -> Self { - Self(kind as u16) - } -} - -impl TokenKind { - pub fn is_literal(self) -> bool { - matches!(self, Self::Number | Self::Identifier) - } - - pub fn infix(self) -> Option<(u16, u16)> { - match self { - Self::BoolOr => Some((78, 79)), - Self::BoolAnd => Some((80, 81)), - Self::Equal - | Self::NotEqual - | Self::LessThan - | Self::GreaterThan - | Self::LessThanAndEqual - | Self::GreaterThanAndEqual => Some((82, 83)), - Self::BitOr => Some((84, 85)), - Self::BitXor => Some((86, 87)), - Self::BitAnd => Some((88, 89)), - Self::ShiftL | Self::ShiftR => Some((90, 91)), - Self::Add | Self::Sub => Some((92, 93)), - Self::Mul | Self::Div | Self::IntDiv | Self::Mod => Some((94, 95)), - Self::Power => Some((96, 97)), - _ => None, - } - } - - pub fn prefix(self) -> Option { - match self { - Self::Sub => Some(100), - Self::Not => Some(99), - Self::BitNot => Some(98), - _ => None, - } - } - - pub fn postfix(self) -> Option { - match self { - Self::Dot => Some(200), - Self::LBracket => Some(201), - _ => None, - } - } - - pub fn is_declaration_kw(self) -> bool { - matches!(self, Self::VarKw | Self::ComponentKw | Self::SignalKw) - } - - pub fn is_trivial(self) -> bool { - matches!( - self, - Self::WhiteSpace | Self::EndLine | Self::CommentLine | Self::BlockComment | Self::Error - ) - } -} +use logos::Logos; +use serde::Serialize; + +#[derive(Logos, Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash, Serialize)] +#[allow(non_camel_case_types)] +#[repr(u16)] +pub enum TokenKind { + // Error + #[error] + Error = 0, + // Comments + #[regex(r"//[^\n]*")] + CommentLine, + #[token("/*")] + CommentBlockOpen, + #[token("*/")] + CommentBlockClose, + // Trivial + #[regex("[ \t]+")] + WhiteSpace, + #[regex("[\n]")] + EndLine, + // Circom + #[token("pragma")] + Pragma, + #[token("circom")] + Circom, + #[regex("2.[0-9].[0-9]")] + Version, + // Literals + #[regex("[0-9]+")] + Number, + #[regex("[$_]*[a-zA-Z][a-zA-Z0-9_$]*")] + Identifier, + #[regex(r#""[^"]*""#)] + CircomString, + // Brackets + #[token("(")] + LParen, + #[token(")")] + RParen, + #[token("{")] + LCurly, + #[token("}")] + RCurly, + #[token("[")] + LBracket, + #[token("]")] + RBracket, + // Punctuation + // Punctuation + #[token(";")] + Semicolon, + #[token(",")] + Comma, + #[token(".")] + Dot, + // Boolean operators + #[token("&&")] + BoolAnd, + #[token("||")] + BoolOr, + #[token("!")] + Not, + // Relational operators + #[token("==")] + Equal, + #[token("!=")] + NotEqual, + #[token("<")] + LessThan, + #[token(">")] + GreaterThan, + #[token("<=")] + LessThanAndEqual, + #[token(">=")] + GreaterThanAndEqual, + // Arithmetic operators + #[token("+")] + Add, + #[token("-")] + Sub, + #[token("*")] + Mul, + #[token("**")] + Power, + #[token("/")] + Div, + #[token("\\")] + IntDiv, + #[token("%")] + Mod, + // Combined arithmetic assignment + #[token("+=")] + AddAssign, + #[token("-=")] + SubAssign, + #[token("*=")] + MulAssign, + #[token("**=")] + PowerAssign, + #[token("/=")] + DivAssign, + #[token(r"\=")] + IntDivAssign, + #[token("%=")] + ModAssign, + #[token("++")] + UnitInc, + #[token("--")] + UnitDec, + // Bitwise operators + #[token("&")] + BitAnd, + #[token("|")] + BitOr, + #[token("~")] + BitNot, + #[token("^")] + BitXor, + #[token(">>")] + ShiftR, + #[token("<<")] + ShiftL, + // Combined bitwise assignments + // Combined bitwise assignments + #[token("&=")] + BitAndAssign, + #[token("|=")] + BitOrAssign, + #[token("~=")] + BitNotAssign, + #[token("^=")] + BitXorAssign, + #[token(">>=")] + ShiftRAssign, + #[token("<<=")] + ShiftLAssign, + // Assign + #[token("=")] + Assign, + #[token("===")] + EqualSignal, + #[token("-->")] + LAssignSignal, + #[token("==>")] + LAssignContraintSignal, + #[token("<--")] + RAssignSignal, + #[token("<==")] + RAssignConstraintSignal, + // Conditional expressions + #[token("?")] + MarkQuestion, + #[token(":")] + Colon, + // Keywords + #[token("template")] + TemplateKw, + #[token("function")] + FunctionKw, + #[token("component")] + ComponentKw, + #[token("main")] + MainKw, + #[token("public")] + PublicKw, + #[token("signal")] + SignalKw, + #[token("var")] + VarKw, + #[token("include")] + IncludeKw, + #[token("input")] + InputKw, + #[token("output")] + OutputKw, + #[token("log")] + LogKw, + // Statement keywords + #[token("if")] + IfKw, + #[token("else")] + ElseKw, + #[token("for")] + ForKw, + #[token("while")] + WhileKw, + #[token("return")] + ReturnKw, + #[token("assert")] + AssertKw, + // Complex token kind + ForLoop, + AssignStatement, + CircomProgram, + SignalOfComponent, + SignalHeader, + Block, + Tuple, + TupleInit, + Call, + TenaryConditional, + Condition, + Expression, + FunctionDef, + Statement, + StatementList, + ComponentDecl, + TemplateDef, + TemplateName, + FunctionName, + ParameterList, + SignalDecl, + VarDecl, + InputSignalDecl, + OutputSignalDecl, + ComponentCall, + ComponentIdentifier, + SignalIdentifier, + ArrayQuery, + ParserError, + BlockComment, + EOF, + ROOT, + __LAST, +} + +impl From for TokenKind { + #[inline] + fn from(d: u16) -> TokenKind { + assert!(d <= (TokenKind::__LAST as u16)); + unsafe { std::mem::transmute::(d) } + } +} + +impl From for TokenKind { + fn from(value: rowan::SyntaxKind) -> Self { + match value { + rowan::SyntaxKind(id) => TokenKind::from(id), + } + } +} + +impl From for u16 { + #[inline] + fn from(k: TokenKind) -> u16 { + k as u16 + } +} + +impl From for rowan::SyntaxKind { + fn from(kind: TokenKind) -> Self { + Self(kind as u16) + } +} + +impl TokenKind { + // a + 10 --> a and 10 are literals + pub fn is_literal(self) -> bool { + matches!(self, Self::Number | Self::Identifier) + } + + // these tokens have the lowest priority + // infix_operator + // eg: a + b --> + is an infix token + pub fn infix(self) -> Option<(u16, u16)> { + match self { + // arithmetic operators + Self::Power => Some((99, 100)), + Self::Mul | Self::Div | Self::IntDiv | Self::Mod => Some((94, 95)), + Self::Add | Self::Sub => Some((89, 90)), + // shift bitwise operators + Self::ShiftL | Self::ShiftR => Some((84, 85)), + // relational operators + Self::LessThan + | Self::GreaterThan + | Self::LessThanAndEqual + | Self::GreaterThanAndEqual => Some((79, 80)), + Self::Equal + | Self::NotEqual => Some((74, 75)), + // other bitwise operators + Self::BitAnd => Some((69, 70)), + Self::BitXor => Some((64, 65)), // exclusive or + Self::BitOr => Some((59, 60)), + // boolean operators + Self::BoolAnd => Some((54, 55)), + Self::BoolOr => Some((49, 50)), + // ---------- + // TODO: how about conditional operation ( ? : ) + // associativity: right to left [ a ? b : c --> ??? ] + // ---------- + // associativity: right to left [ a = b = c --> a = (b = c) ] + // assignment operators + Self::Assign + // bitwise asignment operators + | Self::BitOrAssign + | Self::BitXorAssign + | Self::BitAndAssign + | Self::ShiftLAssign + | Self::ShiftRAssign + // arithmetic asignament operators + | Self::AddAssign + | Self::SubAssign + | Self::MulAssign + | Self::DivAssign + | Self::IntDivAssign + | Self::ModAssign + | Self::PowerAssign => Some((44, 45)), + // TODO: how about comma (expression separator) + Self::Comma => Some((39, 40)), + // not an infix operator + _ => None, + } + } + + // priority: post > pre > in + // associativity: right to left [ --!a --> --(!a) ] + // prefix_operator + // eg: -10, !a, ++a, --a + pub fn prefix(self) -> Option { + match self { + Self::UnitDec | Self::UnitInc | Self::Sub | Self::Add | Self::Not | Self::BitNot => { + Some(200) + } + + _ => None, + } + } + + // these tokens have the highest priority + // postfix_operator + // eg: a[10], b++, c.att1 + pub fn postfix(self) -> Option { + match self { + Self::LParen // function call + | Self::LBracket // array subscript + | Self::Dot // attribute access + | Self::UnitDec | Self::UnitInc => Some(300), + + _ => None, + } + } + + pub fn is_declaration_kw(self) -> bool { + matches!(self, Self::VarKw | Self::ComponentKw | Self::SignalKw) + } + + pub fn is_trivial(self) -> bool { + matches!( + self, + Self::WhiteSpace | Self::EndLine | Self::CommentLine | Self::BlockComment | Self::Error + ) + } +} diff --git a/crates/syntax/Cargo.toml b/crates/syntax/Cargo.toml index d6a3b47..795f0b1 100644 --- a/crates/syntax/Cargo.toml +++ b/crates/syntax/Cargo.toml @@ -9,5 +9,13 @@ rust-version.workspace = true [dependencies] rowan = "0.15.13" parser.workspace = true - lsp-types = {version = "0.94.1", features = ["proposed"]} + +[dev-dependencies] +# for snapshot testing, yaml format +insta = { version = "1.41.1", features = ["yaml"] } + +[profile.dev.package] +# compile slightly slower once, but use less memory, have faster diffs +insta.opt-level = 3 +similar.opt-level = 3 \ No newline at end of file diff --git a/crates/syntax/src/abstract_syntax_tree/template.rs b/crates/syntax/src/abstract_syntax_tree/template.rs index ab2f354..7253b12 100644 --- a/crates/syntax/src/abstract_syntax_tree/template.rs +++ b/crates/syntax/src/abstract_syntax_tree/template.rs @@ -1,5 +1,4 @@ use parser::token_kind::TokenKind::*; -use rowan::SyntaxText; use crate::syntax_node::CircomLanguage; use crate::syntax_node::SyntaxNode; @@ -45,11 +44,11 @@ impl AstTemplateDef { None } - pub fn find_input_signal(&self, name: &SyntaxText) -> Option { + pub fn find_input_signal(&self, name: &str) -> Option { if let Some(statements) = self.statements() { for input_signal in statements.find_children::() { if let Some(signal_name) = input_signal.name() { - if signal_name.equal(name) { + if signal_name.syntax().text() == name { return Some(input_signal); } } @@ -58,11 +57,11 @@ impl AstTemplateDef { None } - pub fn find_output_signal(&self, name: &SyntaxText) -> Option { + pub fn find_output_signal(&self, name: &str) -> Option { if let Some(statements) = self.statements() { for input_signal in statements.find_children::() { if let Some(signal_name) = input_signal.name() { - if signal_name.equal(name) { + if signal_name.syntax().text() == name { return Some(input_signal); } } @@ -71,11 +70,11 @@ impl AstTemplateDef { None } - pub fn find_internal_signal(&self, name: &SyntaxText) -> Option { + pub fn find_internal_signal(&self, name: &str) -> Option { if let Some(statements) = self.statements() { for signal in statements.find_children::() { if let Some(signal_name) = signal.name() { - if signal_name.equal(name) { + if signal_name.syntax().text() == name { return Some(signal); } } diff --git a/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__block_happy_test_statements.snap b/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__block_happy_test_statements.snap new file mode 100644 index 0000000..5db6859 --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__block_happy_test_statements.snap @@ -0,0 +1,12 @@ +--- +source: crates/syntax/src/syntax.rs +expression: statements +--- +- "signal input in[N];" +- "signal output out;" +- "component comp[N-1];" +- "for(var i = 0; i < N-1; i++){\n comp[i] = Multiplier2();\n }" +- "comp[0].in1 <== in[0];" +- "comp[0].in2 <== in[1];" +- "for(var i = 0; i < N-2; i++){\n comp[i+1].in1 <== comp[i].out;\n comp[i+1].in2 <== in[i+2];\n\n }" +- "out <== comp[N-2].out;" \ No newline at end of file diff --git a/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__template_happy_test_statements.snap b/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__template_happy_test_statements.snap new file mode 100644 index 0000000..74cf89f --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__grammar_tests__template_happy_test_statements.snap @@ -0,0 +1,8 @@ +--- +source: crates/syntax/src/syntax.rs +expression: statements +--- +- "signal input in[N];" +- "signal output out;" +- "component comp[N-1];" +- "for(var i = 0; i < N-1; i++){\n comp[i] = Multiplier2();\n }" diff --git a/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_1_children.snap b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_1_children.snap new file mode 100644 index 0000000..b1feddb --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_1_children.snap @@ -0,0 +1,17 @@ +--- +source: crates/syntax/src/syntax.rs +expression: children_string +--- +- pragma circom 2.0.0; +- "\n" +- "\n" +- " " +- "\n" +- " " +- "template Multiplier2 () {}" +- "\n" +- " " +- "template Multiplier2 () {}" +- " " +- "\n" +- " " diff --git a/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_functions.snap b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_functions.snap new file mode 100644 index 0000000..0913804 --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_functions.snap @@ -0,0 +1,5 @@ +--- +source: crates/syntax/src/syntax.rs +expression: function_names +--- +- nbits diff --git a/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_templates.snap b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_templates.snap new file mode 100644 index 0000000..db61c87 --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_2_templates.snap @@ -0,0 +1,5 @@ +--- +source: crates/syntax/src/syntax.rs +expression: template_names +--- +- BinSum diff --git a/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_5_templates.snap b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_5_templates.snap new file mode 100644 index 0000000..8050901 --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_5_templates.snap @@ -0,0 +1,5 @@ +--- +source: crates/syntax/src/syntax.rs +expression: template_names +--- +- Multiplier2 diff --git a/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_6_templates.snap b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_6_templates.snap new file mode 100644 index 0000000..8050901 --- /dev/null +++ b/crates/syntax/src/snapshots/syntax__syntax__tests__syntax_test_6_templates.snap @@ -0,0 +1,5 @@ +--- +source: crates/syntax/src/syntax.rs +expression: template_names +--- +- Multiplier2 diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 9b9202f..ded3cc3 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -1,6 +1,7 @@ use parser::input::Input; use parser::output::{Child, Output}; use parser::parser::Parser; +use parser::token_kind::TokenKind; use rowan::{GreenNode, GreenNodeBuilder}; use crate::syntax_node::SyntaxNode; @@ -30,6 +31,14 @@ impl<'a> SyntaxTreeBuilder<'a> { self.builder.finish_node(); } Child::Tree(child_tree) => self.build_rec(child_tree), + Child::Error(error) => { + let token_kind = TokenKind::Error; + let token_value = error.as_str(); + + self.builder.start_node(token_kind.into()); + self.builder.token(token_kind.into(), token_value); + self.builder.finish_node(); + } } } @@ -58,234 +67,353 @@ impl<'a> SyntaxTreeBuilder<'a> { #[cfg(test)] mod tests { - use parser::token_kind::TokenKind::{self, *}; use std::hash::{DefaultHasher, Hash, Hasher}; - use rowan::{ast::AstNode, TextRange}; + use rowan::ast::AstNode; use crate::{abstract_syntax_tree::AstCircomProgram, test_programs}; use super::SyntaxTreeBuilder; - fn generate_expected_token_kind(ast: &AstCircomProgram) { + fn ast_from_source(source: &str) -> AstCircomProgram { + let syntax = SyntaxTreeBuilder::syntax_tree(source); + AstCircomProgram::cast(syntax).unwrap() + } + + fn children_from_ast(ast: &AstCircomProgram) -> Vec { let children = ast .syntax() .first_child() .unwrap() - .siblings(rowan::Direction::Next); + .siblings(rowan::Direction::Next) + .into_iter() + .map(|child| child.text().to_string()) + .collect(); - println!("vec!["); - for child in children { - println!("{:?},", child.kind()); - } - println!("];"); + children } - fn generate_expected_token_range(ast: &AstCircomProgram) { - let children = ast - .syntax() - .first_child() - .unwrap() - .siblings(rowan::Direction::Next); - - println!("vec!["); - for child in children { - println!( - "TextRange::new({:?}.into(), {:?}.into()), ", - child.text_range().start(), - child.text_range().end() - ); - } - println!("];"); + fn pragma_string_from_ast(ast: &AstCircomProgram) -> String { + ast.pragma().unwrap().syntax().text().to_string() } - fn check_ast_children( - ast: &AstCircomProgram, - expected_kinds: &Vec, - expected_ranges: &Vec, - ) { - let children = ast - .syntax() - .first_child() + fn pragma_version_from_ast(ast: &AstCircomProgram) -> String { + ast.pragma() .unwrap() - .siblings(rowan::Direction::Next); - - let mut kind_iterator = expected_kinds.iter(); - let mut range_iterator = expected_ranges.iter(); - - for child in children { - if let (Some(expected_kind), Some(expected_range)) = - (kind_iterator.next(), range_iterator.next()) - { - assert_eq!(child.kind(), *expected_kind); - assert_eq!(child.text_range(), *expected_range); - } else { - panic!("Mismatched number of children and expected values"); - } - } - println!(); + .version() + .unwrap() + .syntax() + .text() + .to_string() } - #[test] - fn syntax_test_1() { - let source: &str = test_programs::PARSER_TEST_1; - - let expected_pragma = "pragma circom 2.0.0;".to_string(); - let expected_kinds = vec![ - Pragma, - EndLine, - EndLine, - WhiteSpace, - EndLine, - WhiteSpace, - TemplateDef, - EndLine, - WhiteSpace, - TemplateDef, - WhiteSpace, - EndLine, - WhiteSpace, - ]; - let expected_ranges = vec![ - TextRange::new(0.into(), 20.into()), - TextRange::new(20.into(), 21.into()), - TextRange::new(21.into(), 22.into()), - TextRange::new(22.into(), 26.into()), - TextRange::new(26.into(), 27.into()), - TextRange::new(27.into(), 31.into()), - TextRange::new(31.into(), 57.into()), - TextRange::new(57.into(), 58.into()), - TextRange::new(58.into(), 62.into()), - TextRange::new(62.into(), 88.into()), - TextRange::new(88.into(), 89.into()), - TextRange::new(89.into(), 90.into()), - TextRange::new(90.into(), 94.into()), - ]; + fn template_names_from_ast(ast: &AstCircomProgram) -> Vec { + let templates = ast + .template_list() + .iter() + .map(|template| template.name().unwrap().syntax().text().to_string()) + .collect(); - let syntax = SyntaxTreeBuilder::syntax_tree(source); + templates + } - if let Some(ast) = AstCircomProgram::cast(syntax) { - check_ast_children(&ast, &expected_kinds, &expected_ranges); - - // check pragma - let pragma = ast.pragma().unwrap().syntax().text().to_string(); - assert_eq!(pragma, expected_pragma, "Pragma is not correct!"); - - // check ast hash - let mut hasher = DefaultHasher::default(); - ast.syntax().hash(&mut hasher); - let _ast_hash = hasher.finish(); - - // check template hash - let mut h1 = DefaultHasher::default(); - let mut h2 = DefaultHasher::default(); - - let template = ast.template_list(); - - template[0].syntax().hash(&mut h1); - template[1].syntax().hash(&mut h2); - - assert_ne!( - h1.finish(), - h2.finish(), - "Templates with same syntax should have different hashes!" - ); - - // check template syntax (text & green node) - assert_eq!( - template[0].syntax().text(), - template[1].syntax().text(), - "The syntax (as text) of template 1 and 2 must be the same!" - ); - assert_eq!( - template[0].syntax().green(), - template[1].syntax().green(), - "The syntax (as green node) of template 1 and 2 must be the same!!" - ); - } + fn function_names_from_ast(ast: &AstCircomProgram) -> Vec { + let functions = ast + .function_list() + .iter() + .map(|function| { + function + .function_name() + .unwrap() + .syntax() + .text() + .to_string() + }) + .collect(); + + functions + } + + #[test] + fn syntax_test_1() { + let ast = ast_from_source(test_programs::PARSER_TEST_1); + + // check_ast_children + let children = children_from_ast(&ast); + insta::assert_yaml_snapshot!("syntax_test_1_children", children); + + // check pragma + let pragma = pragma_string_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma, @"pragma circom 2.0.0;"); + + // check ast hash + let mut hasher = DefaultHasher::default(); + ast.syntax().hash(&mut hasher); + let _ast_hash = hasher.finish(); + + // check template hash + let mut h1 = DefaultHasher::default(); + let mut h2 = DefaultHasher::default(); + + let template = ast.template_list(); + + template[0].syntax().hash(&mut h1); + template[1].syntax().hash(&mut h2); + + assert_ne!( + h1.finish(), + h2.finish(), + "Templates with same syntax should have different hashes!" + ); + + // check template syntax (text & green node) + assert_eq!( + template[0].syntax().text(), + template[1].syntax().text(), + "The syntax (as text) of template 1 and 2 must be the same!" + ); + assert_eq!( + template[0].syntax().green(), + template[1].syntax().green(), + "The syntax (as green node) of template 1 and 2 must be the same!!" + ); } #[test] fn syntax_test_2() { - let source = test_programs::PARSER_TEST_2; + let ast = ast_from_source(test_programs::PARSER_TEST_2); - let syntax = SyntaxTreeBuilder::syntax_tree(source); + let pragma = pragma_string_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma, @"pragma circom 2.0.0;"); - if let Some(ast) = AstCircomProgram::cast(syntax) { - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); + let function_names = function_names_from_ast(&ast); + insta::assert_yaml_snapshot!("syntax_test_2_functions", function_names); - print!("Templates: "); - let templates = ast.template_list(); - for template in templates.iter() { - print!("{:?} ", template.name().unwrap().syntax().text()); // leading whitespaces - // print!("{:?} ", template.syntax().text()); // leading whitespaces - } - println!(); - - print!("Functions: "); - let functions = ast.function_list(); - for function in functions.iter() { - print!("{:?} ", function.function_name().unwrap().syntax().text()); - // leading whitespaces - // print!("{:?} ", function.syntax().text()); // leading whitespaces - } - println!(); - } + let template_names = template_names_from_ast(&ast); + insta::assert_yaml_snapshot!("syntax_test_2_templates", template_names); } #[test] fn syntax_test_3() { - let source = test_programs::PARSER_TEST_3; - - let syntax = SyntaxTreeBuilder::syntax_tree(source); + let ast = ast_from_source(test_programs::PARSER_TEST_3); + let pragma = pragma_string_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma, @"pragma circom 2.0.0;"); - if let Some(ast) = AstCircomProgram::cast(syntax) { - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); - println!( - "Pragma version: {:?}", - ast.pragma().unwrap().version().unwrap().syntax().text() - ); - } + let pragma_version = pragma_version_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma_version, @"2.0.0"); } #[test] fn syntax_test_4() { - let source = test_programs::PARSER_TEST_4; - - let syntax = SyntaxTreeBuilder::syntax_tree(source); + let ast = ast_from_source(test_programs::PARSER_TEST_4); + let pragma = pragma_string_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma, @"pragma circom 2.0.0;"); - if let Some(ast) = AstCircomProgram::cast(syntax) { - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); - println!( - "Pragma version: {:?}", - ast.pragma().unwrap().version().unwrap().syntax().text() - ); - } + let pragma_version = pragma_version_from_ast(&ast); + insta::assert_yaml_snapshot!(pragma_version, @"2.0.0"); } #[test] fn syntax_test_5() { - let source = test_programs::PARSER_TEST_5; - - let syntax = SyntaxTreeBuilder::syntax_tree(source); + let ast = ast_from_source(test_programs::PARSER_TEST_5); + let pragma = ast.pragma().is_none(); + insta::assert_yaml_snapshot!(pragma, @"true"); - if let Some(ast) = AstCircomProgram::cast(syntax) { - println!("pragma: {:?}", ast.pragma()); - println!("template list: {:?}", ast.template_list()); - // assert!(ast.pragma().is_none(), "No pragma in source code"); - } + let template_names = template_names_from_ast(&ast); + insta::assert_yaml_snapshot!("syntax_test_5_templates", template_names); } #[test] fn syntax_test_6() { - let source = test_programs::PARSER_TEST_6; + let ast = ast_from_source(test_programs::PARSER_TEST_6); + let pragma = ast.pragma().is_none(); + insta::assert_yaml_snapshot!(pragma, @"true"); - let syntax = SyntaxTreeBuilder::syntax_tree(source); + let template_names = template_names_from_ast(&ast); + insta::assert_yaml_snapshot!("syntax_test_6_templates", template_names); + } +} - if let Some(ast) = AstCircomProgram::cast(syntax) { - println!("{:?}", ast.pragma()); - println!("template list: {:?}", ast.template_list()); - // assert!(ast.pragma().is_none(), "No pragma in source code"); - } +#[cfg(test)] +mod grammar_tests { + + use crate::{ + abstract_syntax_tree::{AstBlock, AstOutputSignalDecl, AstPragma, AstTemplateDef}, + syntax::SyntaxTreeBuilder, + syntax_node::CircomLanguage, + }; + use parser::{grammar::entry::Scope, input::Input, parser::Parser}; + use rowan::{ast::AstNode, SyntaxNode}; + + fn syntax_node_from_source(source: &str, scope: Scope) -> SyntaxNode { + let input = Input::new(&source); + let output = Parser::parsing_with_scope(&input, scope); + + // output is a tree whose node is index of token, no content of token + // convert output into green node + let mut builder = SyntaxTreeBuilder::new(&input); + builder.build(output); + let green = builder.finish(); + + // then cast green node into syntax node + let syntax = SyntaxNode::new_root(green); + + syntax + } + + #[test] + fn pragma_happy_test() { + // parse source (string) into output tree + let version = r#"2.0.1"#; + let source = format!(r#"pragma circom {};"#, version); + + let syntax = syntax_node_from_source(&source, Scope::Pragma); + + // cast syntax node into ast node to retrieve more information + let pragma = AstPragma::cast(syntax).expect("Can not cast syntax node into ast pragma"); + + // finally, assert with expect value + let pragma_versison_kind = pragma.version().unwrap().syntax().kind(); + insta::assert_yaml_snapshot!(pragma_versison_kind, @"Version"); + + let pragma_versison_text = pragma.version().unwrap().syntax().text().to_string(); + insta::assert_yaml_snapshot!(pragma_versison_text, @"2.0.1"); + } + + #[test] + fn template_happy_test() { + // SOURCE & EXPECTED RESULT + const SOURCE: &str = r#"template MultiplierN (N, P, QQ) { + //Declaration of signals and components. + signal input in[N]; + signal output out; + component comp[N-1]; + + //Statements. + for(var i = 0; i < N-1; i++){ + comp[i] = Multiplier2(); + } + + // ... some more code (see below) + + }"#; + + let syntax = syntax_node_from_source(&SOURCE, Scope::Template); + + // cast syntax node into ast node to retrieve more information + let template = + AstTemplateDef::cast(syntax).expect("Can not cast syntax node into ast template"); + + // finally, assert with expect value + + // name + let name = template + .name() + .expect("Can not extract template name") + .syntax() + .text() + .to_string(); + insta::assert_yaml_snapshot!(name, @"MultiplierN"); + + // parameter list + let first_param = template + .parameter_list() + .expect("Can not detect parameter list") + .syntax() + .first_child() + .unwrap() + .text() + .to_string(); + insta::assert_yaml_snapshot!(first_param, @"N"); + + let last_param = template + .parameter_list() + .expect("Can not detect parameter list") + .syntax() + .last_child() + .unwrap() + .text() + .to_string(); + insta::assert_yaml_snapshot!(last_param, @"QQ"); + + // statements + let statements = template.statements().unwrap(); + let output_signal = statements.find_children::(); + println!("{:?}", output_signal); + + let statements: Vec = statements + .statement_list() + .into_iter() + .map(|statement| statement.syntax().text().to_string()) + .collect(); + insta::assert_yaml_snapshot!("template_happy_test_statements", statements); + + // input signal + let input_signal = template + .find_input_signal("in") + .unwrap() + .syntax() + .text() + .to_string(); + insta::assert_yaml_snapshot!(input_signal, @"signal input in[N];"); + + // output signal + let output_signal = template + .find_output_signal("out") + .unwrap() + .syntax() + .text() + .to_string(); + insta::assert_yaml_snapshot!(output_signal, @"signal output out;"); + + // internal signal + let internal_signal = template.find_internal_signal("in").is_none(); + insta::assert_yaml_snapshot!(internal_signal, @"true"); + + // component + let component = template + .find_component("comp") + .unwrap() + .syntax() + .text() + .to_string(); + insta::assert_yaml_snapshot!(component, @"component comp[N-1];"); + } + + #[test] + fn block_happy_test() { + // SOURCE & EXPECTED RESULT + let source = r#"{ + //Declaration of signals. + signal input in[N]; + signal output out; + component comp[N-1]; + + //Statements. + for(var i = 0; i < N-1; i++){ + comp[i] = Multiplier2(); + } + comp[0].in1 <== in[0]; + comp[0].in2 <== in[1]; + for(var i = 0; i < N-2; i++){ + comp[i+1].in1 <== comp[i].out; + comp[i+1].in2 <== in[i+2]; + + } + out <== comp[N-2].out; + }"#; + + let syntax = syntax_node_from_source(&source, Scope::Block); + + // cast syntax node into ast node to retrieve more information + let block = AstBlock::cast(syntax).expect("Can not cast syntax node into ast block"); + + // finally, assert with expect statements + let statements = block.statement_list().unwrap().statement_list(); + let statements: Vec = statements + .into_iter() + .map(|statement| statement.syntax().text().to_string()) + .collect(); + insta::assert_yaml_snapshot!("block_happy_test_statements", statements); } }