From 6708dac4a65fa8ae8586c394ce17e7b18e1dbfc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Gir=C3=A1ldez?= Date: Wed, 3 Jul 2024 16:39:19 -0400 Subject: [PATCH] Add anchoring to user documentation and examples --- crates/metaslang/cst/src/query/engine.rs | 5 + .../src/doc_examples/tree_query_language.rs | 106 +++++++++++++++++- .../cargo/tests/src/query/parser_tests.rs | 5 +- .../public/user-guide/tree-query-language.md | 31 ++++- 4 files changed, 140 insertions(+), 7 deletions(-) diff --git a/crates/metaslang/cst/src/query/engine.rs b/crates/metaslang/cst/src/query/engine.rs index aaa100d420..e65608189a 100644 --- a/crates/metaslang/cst/src/query/engine.rs +++ b/crates/metaslang/cst/src/query/engine.rs @@ -340,6 +340,9 @@ struct SequenceMatcher { impl SequenceMatcher { fn new(matcher: Rc>, cursor: Cursor) -> Self { + // Produce a template of instructions to create the matchers for the + // sequence by inserting ellipsis matchers in between each of the child + // matchers, unless it's explicitly disabled by an anchor token. let (mut template, last_anchor) = matcher.children.iter().enumerate().fold( (Vec::new(), false), |(mut acc, last_anchor), (index, child)| { @@ -553,6 +556,8 @@ impl Matcher for OneOrMoreMatcher { } } +/// Matches any number of sibling nodes and is used in between other matchers +/// when matching sequences, unless an explicit anchor is added. struct EllipsisMatcher { cursor: Cursor, has_returned_initial_empty_value: bool, diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs index ce69b9b784..119b2743a4 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs @@ -123,9 +123,7 @@ fn capturing_nodes() { @contract_name name:[Identifier] members:[ContractMembers [ContractMember - [EventDefinition - @event_name name:[Identifier] - ] + [EventDefinition @event_name name:[Identifier]] ] ] ] @@ -275,3 +273,105 @@ fn alternations() { "break" ); } + +#[test] +fn anchoring() { + let query = Query::parse( + &r#" + // --8<-- [start:anchoring-1] + [FunctionDefinition + [ParametersDeclaration + [Parameters . @first_param [Parameter]] + ] + ] + // --8<-- [end:anchoring-1] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("first_param").unwrap()[0] + .node() + .unparse(), + "int x" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:anchoring-2] + [FunctionDefinition + [ParametersDeclaration + [Parameters @last_param [Parameter] .] + ] + ] + // --8<-- [end:anchoring-2] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("last_param").unwrap()[0] + .node() + .unparse(), + " int y" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:anchoring-3] + [Statements + @stmt1 [Statement] . @stmt2 [Statement] + ] + // --8<-- [end:anchoring-3] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches(&query, NonterminalKind::Statements, "int x; int y; x + y;"); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 2); + assert_eq!( + matches[0].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + "int x;" + ); + assert_eq!( + matches[0].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " x + y;" + ); +} diff --git a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs index 6010aa0caa..4ae6163869 100644 --- a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs +++ b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs @@ -82,6 +82,9 @@ fn test_fails_single_anchor() { let result = Query::parse(r#"[_ .]"#); match result { Ok(_) => panic!("Expected parse failure"), - Err(e) => assert_eq!(e.message, "Parse error:\nexpected ']' at: .]\nAlt at: [_ .]\n"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nexpected ']' at: .]\nAlt at: [_ .]\n" + ), } } diff --git a/documentation/public/user-guide/tree-query-language.md b/documentation/public/user-guide/tree-query-language.md index bfca807e70..d203b22555 100644 --- a/documentation/public/user-guide/tree-query-language.md +++ b/documentation/public/user-guide/tree-query-language.md @@ -6,8 +6,8 @@ A _query_ is a pattern that matches a certain set of nodes in a tree. The expression to match a given node consists of a pair of brackets (`[]`) containing two things: the node's kind, and optionally, a series of other patterns that match the node's children. For -example, this pattern would match any `MultiplicativeExpression` node whose children -are exactly two `Expression` nodes, with an `Asterisk` node in between (no whitespace): +example, this pattern would match any `MultiplicativeExpression` node that has +two children `Expression` nodes, with an `Asterisk` node in between: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-1" @@ -36,7 +36,7 @@ node with two children, one of any kind labeled `left_operand` and one of any ki --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-4" ``` -Children can also be elided. For example, this would produce multiple matches for a +Children can be elided. For example, this would produce multiple matches for a `MultiplicativeExpression` where at least _one_ of the children is an expression of a `StringExpression` variant, where each match is associated with each of the `StringExpression` children: @@ -107,3 +107,28 @@ This pattern would match a set of possible keyword terminals, capturing them as ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:alternations-2" ``` + +### Anchoring + +By using anchors '.', you can constrain a pattern to only match the first or the last child nodes. + +For example, the following pattern would match only the first parameter +declaration in a function definition: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:anchoring-1" +``` + +And conversely the following will match only the last parameter: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:anchoring-2" +``` + +If the anchor is used in between two patterns it constrains matches on both +patterns to occur consecutively, ie. without any other sibling node in between. For +example, this pattern matches pairs of consecutive statements: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:anchoring-3" +```