From c1280f4944517ec6b7306dab6965321080eaffbc Mon Sep 17 00:00:00 2001 From: apakhomov Date: Wed, 22 Jan 2025 19:02:48 +0300 Subject: [PATCH] Support CREATE TABLE in optimizer --- db/src/analyzer/tree.rs | 4 +- db/src/catalog/catalog.rs | 20 +++++----- db/src/catalog/types.rs | 12 ++---- db/src/embedded/mod.rs | 26 +++++++------ db/src/optimizer/optimizer.rs | 69 +++++++++++++++++++++++++++-------- db/src/optimizer/types.rs | 42 ++++++++++++++------- 6 files changed, 114 insertions(+), 59 deletions(-) diff --git a/db/src/analyzer/tree.rs b/db/src/analyzer/tree.rs index d7d04e9..701d341 100644 --- a/db/src/analyzer/tree.rs +++ b/db/src/analyzer/tree.rs @@ -1,10 +1,12 @@ use std::fmt::Display; +use strum::Display; + use crate::types::ColType; //#[allow(clippy::unused)] #[allow(dead_code)] -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, Display)] pub enum Operator { Project { columns: Vec, diff --git a/db/src/catalog/catalog.rs b/db/src/catalog/catalog.rs index 7ea0038..b8d040f 100644 --- a/db/src/catalog/catalog.rs +++ b/db/src/catalog/catalog.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; -use super::types::{ColumnId, ColumnSchema, DataType, TableId, TableSchema}; +use crate::types::ColType; + +use super::types::{ColumnId, ColumnSchema, TableId, TableSchema}; pub struct Catalog { store: MemoryCatalogStore, @@ -91,7 +93,7 @@ impl TableSchemaBuilder { self } - pub fn col(&mut self, col_name: &str, data_type: DataType) -> &mut Self { + pub fn col(&mut self, col_name: &str, data_type: ColType) -> &mut Self { self.columns.push(ColumnSchema::new( &ColumnId::new(self.id.as_ref().unwrap(), col_name), data_type, @@ -107,8 +109,6 @@ impl TableSchemaBuilder { #[cfg(test)] mod tests { - use crate::catalog::types::DataType; - use super::*; fn samle_schema() -> TableSchema { @@ -117,11 +117,11 @@ mod tests { vec![ ColumnSchema::new( &ColumnId::new(&TableId::public("table1"), "col1"), - DataType::Int, + ColType::Int, ), ColumnSchema::new( &ColumnId::new(&TableId::public("table1"), "col2"), - DataType::String, + ColType::Text, ), ], ) @@ -214,8 +214,8 @@ mod tests { fn builder() { let table = TableSchemaBuilder::public() .table("table1") - .col("col1", DataType::Int) - .col("col2", DataType::String) + .col("col1", ColType::Int) + .col("col2", ColType::Text) .build(); assert_eq!( @@ -225,11 +225,11 @@ mod tests { vec![ ColumnSchema::new( &ColumnId::new(&TableId::public("table1"), "col1"), - DataType::Int, + ColType::Int, ), ColumnSchema::new( &ColumnId::new(&TableId::public("table1"), "col2"), - DataType::String, + ColType::Text, ), ], ) diff --git a/db/src/catalog/types.rs b/db/src/catalog/types.rs index 280948a..dac8d89 100644 --- a/db/src/catalog/types.rs +++ b/db/src/catalog/types.rs @@ -1,3 +1,5 @@ +use crate::types::ColType; + #[derive(Debug, PartialEq, Hash, Eq, Clone)] pub struct TableId { schema_name: String, @@ -90,20 +92,14 @@ impl TableSchema { #[derive(Debug, PartialEq, Clone)] pub struct ColumnSchema { pub id: ColumnId, - pub data_type: DataType, + pub data_type: ColType, } impl ColumnSchema { - pub fn new(id: &ColumnId, data_type: DataType) -> ColumnSchema { + pub fn new(id: &ColumnId, data_type: ColType) -> ColumnSchema { ColumnSchema { id: id.clone(), data_type, } } } - -#[derive(Debug, PartialEq, Clone)] -pub enum DataType { - Int, - String, -} diff --git a/db/src/embedded/mod.rs b/db/src/embedded/mod.rs index bbf2790..a32bf54 100644 --- a/db/src/embedded/mod.rs +++ b/db/src/embedded/mod.rs @@ -1,15 +1,19 @@ -use std::rc::Rc; +use std::{cell::RefCell, rc::Rc}; use crate::{ analyzer::Analyzer, - catalog::{types::DataType, Catalog, TableSchemaBuilder}, - optimizer::{types::StorageEngine, types::Tuple, types::Val, Optimizer}, + catalog::{Catalog, TableSchemaBuilder}, + optimizer::{ + types::{StorageEngine, Tuple, Val}, + Optimizer, + }, parser::{Lexer, Parser}, + types::ColType, }; pub struct Db { - catalog_rc: Rc, - storage_rc: Rc, + catalog_rc: Rc>, + storage_rc: Rc>, analyzer: Analyzer, optimizer: Optimizer, } @@ -27,9 +31,9 @@ impl Db { let ts = TableSchemaBuilder::public() .table("table1") - .col("name", DataType::Int) - .col("address", DataType::Int) - .col("email", DataType::Int) + .col("name", ColType::Int) + .col("address", ColType::Int) + .col("email", ColType::Int) .build(); let _ = catalog.register_table(&ts); @@ -59,8 +63,8 @@ impl Db { ], ); - let catalog_rc = Rc::new(catalog); - let storage_rc = Rc::new(storage); + let catalog_rc = Rc::new(RefCell::new(catalog)); + let storage_rc = Rc::new(RefCell::new(storage)); let optimizer = Optimizer::new(Rc::clone(&catalog_rc)); @@ -81,6 +85,6 @@ impl Db { let mut p_plan = self.optimizer.optimize(l_plan); - p_plan.execute_all(Rc::clone(&self.storage_rc)) + p_plan.execute_all(Rc::clone(&self.storage_rc), Rc::clone(&self.catalog_rc)) } } diff --git a/db/src/optimizer/optimizer.rs b/db/src/optimizer/optimizer.rs index a538f8c..fd3eeee 100644 --- a/db/src/optimizer/optimizer.rs +++ b/db/src/optimizer/optimizer.rs @@ -1,17 +1,16 @@ use core::panic; -use std::rc::Rc; - +use std::{cell::RefCell, rc::Rc}; use crate::{ analyzer::{LogicalNode, LogicalPlan, Operator}, - catalog::{types::TableId, Catalog}, + catalog::{types::TableId, Catalog, TableSchemaBuilder}, optimizer::types::{Column, FullScanState, PhysicalPlan}, }; use super::types::Op; pub struct Optimizer { - catalog: Rc, + catalog: Rc>, } impl Optimizer { @@ -30,13 +29,13 @@ impl Optimizer { } } - pub fn new(catalog: Rc) -> Self { + pub fn new(catalog: Rc>) -> Self { Optimizer { catalog } } } struct PhysicalPlanBuilder { - catalog: Rc, + catalog: Rc>, } impl PhysicalPlanBuilder { @@ -46,7 +45,7 @@ impl PhysicalPlanBuilder { let mut cols = Vec::new(); // FIXME: table is hardcoded! let table_id = TableId::public("table1"); - let table_schema = self.catalog.get_table(&table_id).unwrap(); + let table_schema = self.catalog.as_ref().borrow().get_table(&table_id).unwrap(); for c in columns { cols.push(Column::new( @@ -79,8 +78,20 @@ impl PhysicalPlanBuilder { children: self.walk(&node.children[0]), }] } + Operator::CreateTable { + table_name, + columns, + } => { + let mut ts = TableSchemaBuilder::public(); + ts.table(table_name); + for c in columns { + ts.col(&c.column_name, c.column_type); + } + + vec![Op::create_table(ts.build())] + } _ => { - panic!("Unsopported node") + panic!("Unsopported node {:?}", node) } } } @@ -88,16 +99,17 @@ impl PhysicalPlanBuilder { #[cfg(test)] mod tests { - use std::{rc::Rc, vec}; + use std::{cell::RefCell, rc::Rc, vec}; use crate::{ analyzer::{Analyzer, LogicalPlan}, - catalog::{types::DataType, Catalog, TableSchemaBuilder}, + catalog::{Catalog, TableSchemaBuilder}, optimizer::{ types::{Column, Op, PhysicalPlan, StorageEngine, Tuple, Val}, Optimizer, }, parser::{Lexer, Parser}, + types::ColType, }; fn analyze(input: &str) -> LogicalPlan { @@ -116,12 +128,12 @@ mod tests { let ts = TableSchemaBuilder::public() .table("table1") - .col("col1", DataType::Int) + .col("col1", ColType::Int) .build(); let _ = catalog.register_table(&ts); let cs = ts.get_column("col1").unwrap(); - let optimizer = Optimizer::new(Rc::new(catalog)); + let optimizer = Optimizer::new(Rc::new(RefCell::new(catalog))); let p_plan = optimizer.optimize(l_plan); @@ -142,7 +154,7 @@ mod tests { let ts = TableSchemaBuilder::public() .table("table1") - .col("col1", DataType::Int) + .col("col1", ColType::Int) .build(); let _ = catalog.register_table(&ts); @@ -156,15 +168,40 @@ mod tests { ], ); - let catalog_rc = Rc::new(catalog); - let storage_rc = Rc::new(storage); + let catalog_rc = Rc::new(RefCell::new(catalog)); + let storage_rc = Rc::new(RefCell::new(storage)); let optimizer = Optimizer::new(Rc::clone(&catalog_rc)); let mut p_plan = optimizer.optimize(l_plan); - let tuples = p_plan.execute_all(Rc::clone(&storage_rc)); + let tuples = p_plan.execute_all(Rc::clone(&storage_rc), Rc::clone(&catalog_rc)); assert_eq!(tuples.len(), 4); } + + #[test] + fn create_table() { + let l_plan = analyze("CREATE TABLE table1 (col1 INT, col2 INT, col3 INT)"); + + let catalog = Catalog::mem(); + + let optimizer = Optimizer::new(Rc::new(RefCell::new(catalog))); + + let p_plan = optimizer.optimize(l_plan); + + assert_eq!( + p_plan, + PhysicalPlan { + root: Op::create_table( + TableSchemaBuilder::public() + .table("table1") + .col("col1", ColType::Int) + .col("col2", ColType::Int) + .col("col3", ColType::Int) + .build() + ) + } + ); + } } diff --git a/db/src/optimizer/types.rs b/db/src/optimizer/types.rs index 8e64a53..b4a01a0 100644 --- a/db/src/optimizer/types.rs +++ b/db/src/optimizer/types.rs @@ -1,9 +1,12 @@ -use std::{collections::HashMap, rc::Rc}; +use std::{cell::RefCell, collections::HashMap, rc::Rc}; use serde::{Deserialize, Serialize}; use strum::Display; -use crate::catalog::types::ColumnSchema; +use crate::catalog::{ + types::{ColumnSchema, TableSchema}, + Catalog, +}; #[derive(Debug, PartialEq)] pub struct PhysicalPlan { @@ -11,9 +14,13 @@ pub struct PhysicalPlan { } impl PhysicalPlan { - pub fn execute_all(&mut self, engine: Rc) -> Vec { + pub fn execute_all( + &mut self, + engine: Rc>, + catalog: Rc>, + ) -> Vec { let mut tuples = Vec::new(); - self.root.open(Rc::clone(&engine)); + self.root.open(Rc::clone(&engine), Rc::clone(&catalog)); while let Some(t) = self.root.next() { tuples.push(t); } @@ -98,6 +105,10 @@ pub enum Op { state: FullScanState, children: Vec, }, + + CreateTable { + ts: TableSchema, + }, } #[derive(Debug, PartialEq, Clone)] @@ -127,10 +138,14 @@ impl Op { Op::Filter { children } } - pub fn open(&mut self, engine: Rc) { + pub fn create_table(ts: TableSchema) -> Op { + Op::CreateTable { ts } + } + + pub fn open(&mut self, engine: Rc>, catalog: Rc>) { match self { Op::FullScan { name, state, .. } => { - let tuples = engine.scan(name); + let tuples = engine.borrow_mut().scan(name); let iter = FullScanIterator { curr_pos: 0, tuples, @@ -140,24 +155,23 @@ impl Op { } Op::Project { children, .. } => { for c in children { - c.open(Rc::clone(&engine)); + c.open(Rc::clone(&engine), Rc::clone(&catalog)); } } Op::Filter { children } => { for c in children { - c.open(Rc::clone(&engine)); + c.open(Rc::clone(&engine), Rc::clone(&catalog)); } } + Op::CreateTable { ts } => { + let _ = catalog.as_ref().borrow_mut().register_table(ts); + } } } fn next(&mut self) -> Option { match self { - Op::FullScan { - name, - state, - children, - } => { + Op::FullScan { state, .. } => { let iter = state.iterator.as_mut().unwrap(); if iter.curr_pos < iter.tuples.len() { let t = iter.tuples[iter.curr_pos].clone(); @@ -175,6 +189,7 @@ impl Op { let t = children[0].next().unwrap(); Some(t) } + Op::CreateTable { .. } => None, } } @@ -193,6 +208,7 @@ impl Op { c.close(); } } + Op::CreateTable { .. } => {} } } }