-
Notifications
You must be signed in to change notification settings - Fork 68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
stringify test cases and some comments #122
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,16 +149,17 @@ impl Lexer { | |
|
||
/// Checks the next token without advancing position | ||
/// See also [advance_token](Self::advance_token) | ||
/// TODO: is there not a way to do this without mutating `self`? | ||
pub fn peek_token(&mut self) -> TokenType { | ||
let position = self.position; | ||
let orig_position = self.position; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try not to rename variables unless there is a need for it or a previous discussion. |
||
let tk = self.next_token(); | ||
self.next_token_position = Some(self.position); | ||
self.position = position; | ||
self.position = orig_position; | ||
tk | ||
} | ||
|
||
/// Advances position. This is used in conjunction with [`peek_token`](Self::peek_token) | ||
/// It is a noop if the has not been a previous peek_token | ||
/// It is a noop if there has not been a previous peek_token | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
pub fn advance_token(&mut self) { | ||
if let Some(position) = self.next_token_position { | ||
self.position = position; | ||
|
@@ -446,6 +447,7 @@ impl Lexer { | |
|
||
// Private methods | ||
|
||
// Move position to the end of input and return a `LexerError` value. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's discuss how to best comment code first. This is an awesome contribution but a bit more difficult than it looks like. |
||
fn set_error(&mut self, message: &str, position: usize) -> LexerError { | ||
self.position = self.len; | ||
LexerError { | ||
|
@@ -454,6 +456,7 @@ impl Lexer { | |
} | ||
} | ||
|
||
// Show the next char without advancing, if there is a next char. | ||
fn peek_char(&mut self) -> Option<char> { | ||
let position = self.position; | ||
if position < self.len { | ||
|
@@ -463,6 +466,7 @@ impl Lexer { | |
} | ||
} | ||
|
||
// Returns an error unless `ch_expected` matches the char at the current position. | ||
fn expect_char(&mut self, ch_expected: char) -> Result<()> { | ||
let position = self.position; | ||
if position >= self.len { | ||
|
@@ -483,17 +487,18 @@ impl Lexer { | |
Ok(()) | ||
} | ||
|
||
// Consume (read) the character at the curent position. | ||
fn read_next_char(&mut self) -> Option<char> { | ||
let position = self.position; | ||
if position < self.len { | ||
self.position = position + 1; | ||
Some(self.chars[position]) | ||
let orig_position = self.position; | ||
if orig_position < self.len { | ||
self.position = orig_position + 1; | ||
Some(self.chars[orig_position]) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
// Consumes an integer from the input stream | ||
// Consumes an integer from the input stream. Does not support thousands separator or sign. | ||
fn consume_integer(&mut self, first: char) -> Result<i32> { | ||
let mut position = self.position; | ||
let len = self.len; | ||
|
@@ -581,6 +586,7 @@ impl Lexer { | |
// Consumes an identifier from the input stream | ||
fn consume_identifier(&mut self) -> String { | ||
let mut position = self.position; | ||
// Advance to the final character in the identifier. | ||
while position < self.len { | ||
let next_char = self.chars[position]; | ||
if next_char.is_alphanumeric() || next_char == '_' || next_char == '.' { | ||
|
@@ -594,6 +600,7 @@ impl Lexer { | |
chars | ||
} | ||
|
||
// Consumes a double-quote delimited string value. | ||
fn consume_string(&mut self) -> String { | ||
let mut position = self.position; | ||
let len = self.len; | ||
|
@@ -602,12 +609,15 @@ impl Lexer { | |
let x = self.chars[position]; | ||
position += 1; | ||
if x != '"' { | ||
// regular character inside string | ||
chars.push(x); | ||
} else if position < len && self.chars[position] == '"' { | ||
// found a double quote before the end of input | ||
chars.push(x); | ||
chars.push(self.chars[position]); | ||
chars.push(self.chars[position]); // '"', character after `x` | ||
position += 1; | ||
} else { | ||
// We are no longer inside the string. | ||
break; | ||
} | ||
} | ||
|
@@ -656,7 +666,7 @@ impl Lexer { | |
Ok(chars) | ||
} | ||
|
||
// Reads an error from the input stream | ||
// Reads an error from the input stream. | ||
fn consume_error(&mut self) -> TokenType { | ||
let errors = &self.language.errors; | ||
let rest_of_formula: String = self.chars[self.position - 1..self.len].iter().collect(); | ||
|
@@ -699,7 +709,8 @@ impl Lexer { | |
} | ||
TokenType::Illegal(self.set_error("Invalid error.", self.position)) | ||
} | ||
|
||
|
||
// Consume (and discard) whitespace starting at current position. | ||
fn consume_whitespace(&mut self) { | ||
let mut position = self.position; | ||
let len = self.len; | ||
|
@@ -740,6 +751,7 @@ impl Lexer { | |
self.consume_range(Some(sheet_name)) | ||
} | ||
|
||
// Read a range, which may be a single cell or a multi-cell range. | ||
fn consume_range(&mut self, sheet: Option<String>) -> TokenType { | ||
let m = if self.mode == LexerMode::A1 { | ||
self.consume_range_a1() | ||
|
@@ -749,8 +761,10 @@ impl Lexer { | |
match m { | ||
Ok(ParsedRange { left, right }) => { | ||
if let Some(right) = right { | ||
// Two-sided range | ||
TokenType::Range { sheet, left, right } | ||
} else { | ||
// Single-cell reference | ||
TokenType::Reference { | ||
sheet, | ||
column: left.column, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ use crate::expressions::{ | |
types::ParsedReference, | ||
}; | ||
|
||
// TODO: why not jsut pass in a `LexerMode`? It's more expressive than a bool arg. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean by that? Maybe we can discuss? |
||
fn new_lexer(formula: &str, a1_mode: bool) -> Lexer { | ||
let locale = get_locale("en").unwrap(); | ||
let language = get_language("en").unwrap(); | ||
|
@@ -35,8 +36,8 @@ fn test_number_integer() { | |
} | ||
#[test] | ||
fn test_number_pi() { | ||
let mut lx = new_lexer("3.415", true); | ||
assert_eq!(lx.next_token(), Number(3.415)); | ||
let mut lx = new_lexer("3.14159", true); | ||
assert_eq!(lx.next_token(), Number(3.14159)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you doing this change? Do you think it covers another case? |
||
assert_eq!(lx.next_token(), EOF); | ||
} | ||
#[test] | ||
|
@@ -685,3 +686,15 @@ fn test_comparisons() { | |
assert_eq!(lx.next_token(), Number(7.0)); | ||
assert_eq!(lx.next_token(), EOF); | ||
} | ||
|
||
#[test] | ||
fn test_whitespace() { | ||
let mut lx = new_lexer(" ", true); | ||
assert_eq!(lx.next_token(), EOF); | ||
} | ||
|
||
#[test] | ||
fn test_empty() { | ||
let mut lx = new_lexer("", true); | ||
assert_eq!(lx.next_token(), EOF); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two are nice to have, maybe in a different PR? |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ mod test_ranges; | |
mod test_move_formula; | ||
#[cfg(test)] | ||
mod test_tables; | ||
mod test_stringify; | ||
|
||
pub(crate) fn parse_range(formula: &str) -> Result<(i32, i32, i32, i32), String> { | ||
let mut lexer = lexer::Lexer::new( | ||
|
@@ -111,7 +112,10 @@ pub enum Node { | |
}, | ||
RangeKind { | ||
sheet_name: Option<String>, | ||
// index into vector (0-based) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, they are, but I feel we should document this at the API level and not clutter code here. Kind of the leitmotif is that a cell is referenced by three numbers |
||
sheet_index: u32, | ||
// coordinates of opposite corners of the range, e.g. A1:C4. | ||
// true if cell 1 is anchored | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
absolute_row1: bool, | ||
absolute_column1: bool, | ||
row1: i32, | ||
|
@@ -158,6 +162,7 @@ pub enum Node { | |
right: Box<Node>, | ||
}, | ||
OpPowerKind { | ||
// why is there no kind? | ||
left: Box<Node>, | ||
right: Box<Node>, | ||
}, | ||
|
@@ -193,7 +198,9 @@ pub enum Node { | |
pub struct Parser { | ||
lexer: lexer::Lexer, | ||
worksheets: Vec<String>, | ||
// Cell in which parsing is happening. | ||
context: Option<CellReferenceRC>, | ||
// Map of table name to tables. | ||
tables: HashMap<String, Table>, | ||
} | ||
|
||
|
@@ -202,8 +209,8 @@ impl Parser { | |
let lexer = lexer::Lexer::new( | ||
"", | ||
lexer::LexerMode::A1, | ||
get_locale("en").expect(""), | ||
get_language("en").expect(""), | ||
get_locale("en").expect("Failed to get locale"), | ||
get_language("en").expect("Failed to get language"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! We actually need to remove all expect from code, but that's a problem for a different day. Here expect, expect with message or unwrap are all equally bad. Let me take care of this. |
||
); | ||
Parser { | ||
lexer, | ||
|
@@ -212,14 +219,16 @@ impl Parser { | |
tables, | ||
} | ||
} | ||
|
||
pub fn set_lexer_mode(&mut self, mode: lexer::LexerMode) { | ||
self.lexer.set_lexer_mode(mode) | ||
} | ||
|
||
pub fn set_worksheets(&mut self, worksheets: Vec<String>) { | ||
self.worksheets = worksheets; | ||
} | ||
|
||
|
||
// Convert the cell referred to in context into an Abstract Syntax Tree, and return the head node. | ||
pub fn parse(&mut self, formula: &str, context: &Option<CellReferenceRC>) -> Node { | ||
self.lexer.set_formula(formula); | ||
self.context.clone_from(context); | ||
|
@@ -418,6 +427,7 @@ impl Parser { | |
} | ||
TokenType::Number(s) => Node::NumberKind(s), | ||
TokenType::String(s) => Node::StringKind(s), | ||
// semicolon-delimited list of array entries | ||
TokenType::LeftBrace => { | ||
let t = self.parse_expr(); | ||
if let Node::ParseErrorKind { .. } = t { | ||
|
@@ -464,6 +474,7 @@ impl Parser { | |
Some(name) => self.get_sheet_index_by_name(name), | ||
None => self.get_sheet_index_by_name(&context.sheet), | ||
}; | ||
// todo: bust out helper function for this calculation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what you mean by this. Maybe let's talk? |
||
let a1_mode = self.lexer.is_a1_mode(); | ||
let row = if absolute_row || !a1_mode { | ||
row | ||
|
@@ -508,6 +519,8 @@ impl Parser { | |
Some(name) => self.get_sheet_index_by_name(name), | ||
None => self.get_sheet_index_by_name(&context.sheet), | ||
}; | ||
// 1-based indices. These values will be adjusted by the context unless | ||
// they are absolute. | ||
let mut row1 = left.row; | ||
let mut column1 = left.column; | ||
let mut row2 = right.row; | ||
|
@@ -532,6 +545,7 @@ impl Parser { | |
column2 -= context.column | ||
}; | ||
} | ||
// Swap corners if necessary. | ||
if row1 > row2 { | ||
(row2, row1) = (row1, row2); | ||
(absolute_row2, absolute_row1) = (absolute_row1, absolute_row2); | ||
|
@@ -540,6 +554,7 @@ impl Parser { | |
(column2, column1) = (column1, column2); | ||
(absolute_column2, absolute_column1) = (absolute_column1, absolute_column2); | ||
} | ||
// 0-based index | ||
match sheet_index { | ||
Some(index) => Node::RangeKind { | ||
sheet_name: sheet, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This kind of questions shouldn't be in code. The reason is that
peek_token
advances te token position. Otherwise we would hace to parse it all over again if we "accept" it