ironcalc · jaycarlton · Nov 3, 2024 · Nov 5, 2024 · Nov 6, 2024 · Nov 10, 2024
diff --git a/base/src/expressions/lexer/mod.rs b/base/src/expressions/lexer/mod.rs
@@ -149,16 +149,17 @@ impl Lexer {
 
     /// Checks the next token without advancing position
     /// See also [advance_token](Self::advance_token)
+    /// TODO: is there not a way to do this without mutating `self`?
     pub fn peek_token(&mut self) -> TokenType {
-        let position = self.position;
+        let orig_position = self.position;
         let tk = self.next_token();
         self.next_token_position = Some(self.position);
-        self.position = position;
+        self.position = orig_position;
         tk
     }
 
     /// Advances position. This is used in conjunction with [`peek_token`](Self::peek_token)
-    /// It is a noop if the has not been a previous peek_token
+    /// It is a noop if there has not been a previous peek_token
     pub fn advance_token(&mut self) {
         if let Some(position) = self.next_token_position {
             self.position = position;
@@ -446,6 +447,7 @@ impl Lexer {
 
     // Private methods
 
+    // Move position to the end of input and return a `LexerError` value.
     fn set_error(&mut self, message: &str, position: usize) -> LexerError {
         self.position = self.len;
         LexerError {
@@ -454,6 +456,7 @@ impl Lexer {
         }
     }
 
+    // Show the next char without advancing, if there is a next char.
     fn peek_char(&mut self) -> Option<char> {
         let position = self.position;
         if position < self.len {
@@ -463,6 +466,7 @@ impl Lexer {
         }
     }
 
+    // Returns an error unless `ch_expected` matches the char at the current position.
     fn expect_char(&mut self, ch_expected: char) -> Result<()> {
         let position = self.position;
         if position >= self.len {
@@ -483,17 +487,18 @@ impl Lexer {
         Ok(())
     }
 
+    // Consume (read) the character at the curent position.
     fn read_next_char(&mut self) -> Option<char> {
-        let position = self.position;
-        if position < self.len {
-            self.position = position + 1;
-            Some(self.chars[position])
+        let orig_position = self.position;
+        if orig_position < self.len {
+            self.position = orig_position + 1;
+            Some(self.chars[orig_position])
         } else {
             None
         }
     }
 
-    // Consumes an integer from the input stream
+    // Consumes an integer from the input stream. Does not support thousands separator or sign.
     fn consume_integer(&mut self, first: char) -> Result<i32> {
         let mut position = self.position;
         let len = self.len;
@@ -581,6 +586,7 @@ impl Lexer {
     // Consumes an identifier from the input stream
     fn consume_identifier(&mut self) -> String {
         let mut position = self.position;
+        // Advance to the final character in the identifier.
         while position < self.len {
             let next_char = self.chars[position];
             if next_char.is_alphanumeric() || next_char == '_' || next_char == '.' {
@@ -594,6 +600,7 @@ impl Lexer {
         chars
     }
 
+    // Consumes a double-quote delimited string value.
     fn consume_string(&mut self) -> String {
         let mut position = self.position;
         let len = self.len;
@@ -602,12 +609,15 @@ impl Lexer {
             let x = self.chars[position];
             position += 1;
             if x != '"' {
+                // regular character inside string
                 chars.push(x);
             } else if position < len && self.chars[position] == '"' {
+                // found a double quote before the end of input
                 chars.push(x);
-                chars.push(self.chars[position]);
+                chars.push(self.chars[position]);  // '"', character after `x`
                 position += 1;
             } else {
+                // We are no longer inside the string.
                 break;
             }
         }
@@ -656,7 +666,7 @@ impl Lexer {
         Ok(chars)
     }
 
-    // Reads an error from the input stream
+    // Reads an error from the input stream.    
     fn consume_error(&mut self) -> TokenType {
         let errors = &self.language.errors;
         let rest_of_formula: String = self.chars[self.position - 1..self.len].iter().collect();
@@ -699,7 +709,8 @@ impl Lexer {
         }
         TokenType::Illegal(self.set_error("Invalid error.", self.position))
     }
-
+
+    // Consume (and discard) whitespace starting at current position.
     fn consume_whitespace(&mut self) {
         let mut position = self.position;
         let len = self.len;
@@ -740,6 +751,7 @@ impl Lexer {
         self.consume_range(Some(sheet_name))
     }
 
+    // Read a range, which may be a single cell or a multi-cell range.
     fn consume_range(&mut self, sheet: Option<String>) -> TokenType {
         let m = if self.mode == LexerMode::A1 {
             self.consume_range_a1()
@@ -749,8 +761,10 @@ impl Lexer {
         match m {
             Ok(ParsedRange { left, right }) => {
                 if let Some(right) = right {
+                    // Two-sided range
                     TokenType::Range { sheet, left, right }
                 } else {
+                    // Single-cell reference
                     TokenType::Reference {
                         sheet,
                         column: left.column,

diff --git a/base/src/expressions/lexer/test/test_common.rs b/base/src/expressions/lexer/test/test_common.rs
@@ -10,6 +10,7 @@ use crate::expressions::{
     types::ParsedReference,
 };
 
+// TODO: why not jsut pass in a `LexerMode`? It's more expressive than a bool arg.
 fn new_lexer(formula: &str, a1_mode: bool) -> Lexer {
     let locale = get_locale("en").unwrap();
     let language = get_language("en").unwrap();
@@ -35,8 +36,8 @@ fn test_number_integer() {
 }
 #[test]
 fn test_number_pi() {
-    let mut lx = new_lexer("3.415", true);
-    assert_eq!(lx.next_token(), Number(3.415));
+    let mut lx = new_lexer("3.14159", true);
+    assert_eq!(lx.next_token(), Number(3.14159));
     assert_eq!(lx.next_token(), EOF);
 }
 #[test]
@@ -685,3 +686,15 @@ fn test_comparisons() {
     assert_eq!(lx.next_token(), Number(7.0));
     assert_eq!(lx.next_token(), EOF);
 }
+
+#[test]
+fn test_whitespace() {
+    let mut lx = new_lexer("   ", true);
+    assert_eq!(lx.next_token(), EOF);
+}
+
+#[test]
+fn test_empty() {
+    let mut lx = new_lexer("", true);
+    assert_eq!(lx.next_token(), EOF);
+}
diff --git a/base/src/expressions/parser/mod.rs b/base/src/expressions/parser/mod.rs
@@ -58,6 +58,7 @@ mod test_ranges;
 mod test_move_formula;
 #[cfg(test)]
 mod test_tables;
+mod test_stringify;
 
 pub(crate) fn parse_range(formula: &str) -> Result<(i32, i32, i32, i32), String> {
     let mut lexer = lexer::Lexer::new(
@@ -111,7 +112,10 @@ pub enum Node {
     },
     RangeKind {
         sheet_name: Option<String>,
+        // index into vector (0-based)
         sheet_index: u32,
+        // coordinates of opposite corners of the range, e.g. A1:C4.
+        // true if cell 1 is anchored
         absolute_row1: bool,
         absolute_column1: bool,
         row1: i32,
@@ -158,6 +162,7 @@ pub enum Node {
         right: Box<Node>,
     },
     OpPowerKind {
+        // why is there no kind?
         left: Box<Node>,
         right: Box<Node>,
     },
@@ -193,7 +198,9 @@ pub enum Node {
 pub struct Parser {
     lexer: lexer::Lexer,
     worksheets: Vec<String>,
+    // Cell in which parsing is happening.
     context: Option<CellReferenceRC>,
+    // Map of table name to tables.
     tables: HashMap<String, Table>,
 }
 
@@ -202,8 +209,8 @@ impl Parser {
         let lexer = lexer::Lexer::new(
             "",
             lexer::LexerMode::A1,
-            get_locale("en").expect(""),
-            get_language("en").expect(""),
+            get_locale("en").expect("Failed to get locale"),
+            get_language("en").expect("Failed to get language"),
         );
         Parser {
             lexer,
@@ -212,14 +219,16 @@ impl Parser {
             tables,
         }
     }
+
     pub fn set_lexer_mode(&mut self, mode: lexer::LexerMode) {
         self.lexer.set_lexer_mode(mode)
     }
 
     pub fn set_worksheets(&mut self, worksheets: Vec<String>) {
         self.worksheets = worksheets;
     }
-
+
+    // Convert the cell referred to in context into an Abstract Syntax Tree, and return the head node.
     pub fn parse(&mut self, formula: &str, context: &Option<CellReferenceRC>) -> Node {
         self.lexer.set_formula(formula);
         self.context.clone_from(context);
@@ -418,6 +427,7 @@ impl Parser {
             }
             TokenType::Number(s) => Node::NumberKind(s),
             TokenType::String(s) => Node::StringKind(s),
+            // semicolon-delimited list of array entries
             TokenType::LeftBrace => {
                 let t = self.parse_expr();
                 if let Node::ParseErrorKind { .. } = t {
@@ -464,6 +474,7 @@ impl Parser {
                     Some(name) => self.get_sheet_index_by_name(name),
                     None => self.get_sheet_index_by_name(&context.sheet),
                 };
+                // todo: bust out helper function for this calculation
                 let a1_mode = self.lexer.is_a1_mode();
                 let row = if absolute_row || !a1_mode {
                     row
@@ -508,6 +519,8 @@ impl Parser {
                     Some(name) => self.get_sheet_index_by_name(name),
                     None => self.get_sheet_index_by_name(&context.sheet),
                 };
+                // 1-based indices. These values will be adjusted by the context unless
+                // they  are absolute.
                 let mut row1 = left.row;
                 let mut column1 = left.column;
                 let mut row2 = right.row;
@@ -532,6 +545,7 @@ impl Parser {
                         column2 -= context.column
                     };
                 }
+                // Swap corners if necessary.
                 if row1 > row2 {
                     (row2, row1) = (row1, row2);
                     (absolute_row2, absolute_row1) = (absolute_row1, absolute_row2);
@@ -540,6 +554,7 @@ impl Parser {
                     (column2, column1) = (column1, column2);
                     (absolute_column2, absolute_column1) = (absolute_column1, absolute_column2);
                 }
+                // 0-based index
                 match sheet_index {
                     Some(index) => Node::RangeKind {
                         sheet_name: sheet,

diff --git a/base/src/expressions/parser/move_formula.rs b/base/src/expressions/parser/move_formula.rs
@@ -42,6 +42,7 @@ pub(crate) fn move_formula(node: &Node, move_context: &MoveContext) -> String {
     to_string_moved(node, move_context)
 }
 
+// Move the named function by `move_context`, updating its parsed argument nodes.
 fn move_function(name: &str, args: &Vec<Node>, move_context: &MoveContext) -> String {
     let mut first = true;
     let mut arguments = "".to_string();

diff --git a/base/src/expressions/parser/test.rs b/base/src/expressions/parser/test.rs
@@ -44,8 +44,11 @@ fn test_parser_absolute_column() {
         row: 1,
         column: 1,
     };
-    let t = parser.parse("$A1", &Some(cell_reference));
+    let t = parser.parse("$A1", &Some(cell_reference.clone()));
     assert_eq!(to_rc_format(&t), "R[0]C1");
+
+    let t = parser.parse("SIN($A1)", &Some(cell_reference.clone()));
+    assert_eq!(to_rc_format(&t), "SIN(R[0]C1)");
 }
 
 #[test]