From a01f2eb22f2c9dd45d9f1c0e84559555a3101f7e Mon Sep 17 00:00:00 2001 From: apakhomov Date: Fri, 11 Oct 2024 22:44:41 +0300 Subject: [PATCH] Select, from support --- agenda.md | 26 ++++++- src/parser/mod.rs | 192 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 203 insertions(+), 15 deletions(-) diff --git a/agenda.md b/agenda.md index 79d34ce..99169a8 100644 --- a/agenda.md +++ b/agenda.md @@ -7,8 +7,26 @@ Today we are going to: -- [x] review the code -- [x] support parens in the parser +- [x] SELECT, FROM, + +- [ ] WHERE statement support + +select c1, c2 from t where c1 = 1 and c2 = 2; + + Node(type = SELECT) + / \ + select_col_list Node(type = WHERE) from(t) + (c1, c2) + / \ + Node(type = AND) Node(type = AND) + / \ / \ + Node(c1=1) Node(c2=2) Node(c1=1) Node(c2=2) + + + + + + + + -Next: -- [ ] statement support diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4f2bbac..6f7e2a2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -199,6 +199,11 @@ struct ColumnIdentifier { pub enum Literal { Numeric(i32), String(String), + Identifier { + first_name: String, + second_name: Option, + third_name: Option, + }, Float(f32), Boolean(bool), } @@ -207,6 +212,44 @@ impl Literal { fn numeric(i: String) -> Literal { Literal::Numeric(i.parse().unwrap()) } + + fn identifier(identifier: &str) -> Literal { + let parts: Vec<&str> = identifier.split('.').collect(); + + match parts.len() { + 1 => Literal::Identifier { + first_name: parts[0].to_string(), + second_name: None, + third_name: None, + }, + 2 => Literal::Identifier { + first_name: parts[0].to_string(), + second_name: Some(parts[1].to_string()), + third_name: None, + }, + 3 => Literal::Identifier { + first_name: parts[0].to_string(), + second_name: Some(parts[1].to_string()), + third_name: Some(parts[2].to_string()), + }, + _ => panic!("Invalid identifier: {}", identifier), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct ColumnList { + columns: Vec, +} + +#[derive(Debug, PartialEq)] +pub struct TableList { + tables: Vec, +} + +#[derive(Debug, PartialEq)] +pub struct Where { + conditions: Vec, } #[derive(Debug, PartialEq)] @@ -218,9 +261,13 @@ struct Function { #[derive(Debug, PartialEq)] pub enum Node { Leaf(Literal), + Inner(Op, Box, Box), Prefix(Op, Box), Postfix(Op, Box), + + Select(Box, Option>), + From(Box), } pub struct Parser<'a> { @@ -243,6 +290,20 @@ impl Node { fn leaf(literal: Literal) -> Node { Node::Leaf(literal) } + + fn select(column_nodes: Node, from_node: Option) -> Node { + match from_node { + Some(Node::From(_)) => {} + None => {} + _ => panic!("Expected From node"), + } + + Node::Select(Box::new(column_nodes), from_node.map(Box::new)) + } + + fn from(node: Node) -> Node { + Node::From(Box::new(node)) + } } #[derive(Debug, PartialEq)] @@ -263,6 +324,7 @@ pub enum Op { GreaterThanOrEquals, Not, CloseParen, + Comma, } impl<'a> Parser<'a> { @@ -271,29 +333,45 @@ impl<'a> Parser<'a> { } pub fn parse(&mut self) -> Node { - dbg!(self.parse_bp(0)) + self.parse_bp(0) } - // (1 + 2) * 3 + // select col1, fn parse_bp(&mut self, min_bp: u8) -> Node { - dbg!("REQ"); - dbg!(min_bp, self.lexer.peek()); let mut lhs = match self.lexer.next() { Some(Ok(Token::NumericLiteral(i))) => Node::leaf(Literal::numeric(i)), + Some(Ok(Token::Identifier { + first_name, + second_name: None, + third_name: None, + })) => Node::leaf(Literal::Identifier { + first_name: first_name.to_string(), + second_name: None, + third_name: None, + }), Some(Ok(Token::Not)) => { let ((), r_bp) = Self::prefix_operator_bp(&Op::Not); let rhs = self.parse_bp(r_bp); Node::prefix(Op::Not, rhs) } Some(Ok(Token::OpenParen)) => { - dbg!("OpenParen found"); let lhs = self.parse_bp(0); match self.lexer.next() { - Some(Ok(Token::CloseParen)) => { - dbg!("CloseParen found"); - lhs + Some(Ok(Token::CloseParen)) => lhs, + s => panic!("Unexpected token: {:?}", s), + } + } + Some(Ok(Token::Select)) => { + let rhs = self.parse_bp(0); + + match self.lexer.next() { + Some(Ok(Token::From)) => { + let rhs1 = self.parse_bp(0); + + Node::select(rhs, Some(Node::from(rhs1))) } + None => Node::select(rhs, None), s => panic!("Unexpected token: {:?}", s), } } @@ -315,11 +393,10 @@ impl<'a> Parser<'a> { Some(Ok(Token::LessThanOrEquals)) => Op::LessThanOrEquals, Some(Ok(Token::GreaterThanOrEquals)) => Op::GreaterThanOrEquals, Some(Ok(Token::CloseParen)) => Op::CloseParen, + Some(Ok(Token::Comma)) => Op::Comma, _ => break, }; - dbg!(&op); - // postfix bp if let Some((l_bp, ())) = Self::postfix_operator_bp(&op) { // operate with postfix @@ -357,6 +434,7 @@ impl<'a> Parser<'a> { _ => None, } } + fn infix_operator_bp(op: &Op) -> Option<(u8, u8)> { match op { Op::Or => Some((1, 2)), @@ -369,6 +447,7 @@ impl<'a> Parser<'a> { Op::LessThanOrEquals => Some((4, 5)), Op::GreaterThanOrEquals => Some((4, 5)), + Op::Comma => Some((4, 5)), Op::Plus => Some((6, 7)), Op::Minus => Some((6, 7)), @@ -384,7 +463,7 @@ impl<'a> Parser<'a> { #[cfg(test)] mod tests { - use crate::parser::{Literal, Node, Op, Parser}; + use crate::parser::{ColumnStatement, Literal, Node, Op, Parser}; use super::lexer::Lexer; use pretty_assertions::assert_eq; @@ -653,4 +732,95 @@ mod tests { ) ); } + + #[test] + fn select_query_without_from() { + let input = "select 1"; + let lexer = Lexer::new(input); + + let mut parser = Parser::new(lexer); + + let parse_tree = parser.parse(); + + assert_eq!( + parse_tree, + Node::select(Node::leaf(Literal::Numeric(1)), None) + ); + } + + #[test] + fn select_query_with_from() { + let input = "select 1 from table1"; + let lexer = Lexer::new(input); + + let mut parser = Parser::new(lexer); + + let parse_tree = parser.parse(); + + assert_eq!( + parse_tree, + Node::select( + Node::leaf(Literal::Numeric(1)), + Some(Node::from(Node::leaf(Literal::identifier("table1")))) + ) + ); + } + + #[test] + fn select_query_many_columns() { + let input = "select col1, col2 from table1"; + + let lexer = Lexer::new(input); + + let mut parser = Parser::new(lexer); + + let parse_tree = parser.parse(); + + assert_eq!( + parse_tree, + Node::select( + Node::inner( + Op::Comma, + Node::leaf(Literal::identifier("col1")), + Node::leaf(Literal::identifier("col2")) + ), + Some(Node::from(Node::leaf(Literal::identifier("table1")))) + ) + ); + } + + #[test] + fn select_query_many_commas() { + let input = "select col1, col2, 1 + 1 from table1, table2"; + + let lexer = Lexer::new(input); + + let mut parser = Parser::new(lexer); + + let parse_tree = parser.parse(); + + assert_eq!( + parse_tree, + Node::select( + Node::inner( + Op::Comma, + Node::inner( + Op::Comma, + Node::leaf(Literal::identifier("col1")), + Node::leaf(Literal::identifier("col2")) + ), + Node::inner( + Op::Plus, + Node::leaf(Literal::Numeric(1)), + Node::leaf(Literal::Numeric(1)) + ) + ), + Some(Node::from(Node::inner( + Op::Comma, + Node::leaf(Literal::identifier("table1")), + Node::leaf(Literal::identifier("table2")) + ))) + ) + ); + } }