diff --git a/Cargo.toml b/Cargo.toml index 4b47fdb4..933ce1ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ nightly = [] # Allows deeper recursion by dynamically spilling stack state on to the heap. spill-stack = ["stacker", "std"] -# Allows parser memoisation, speeding up heavily back-tracking parsers and allowing left recursion. +# Allows parser memoization, speeding up heavily back-tracking parsers and allowing left recursion. memoization = [] # Allows extending chumsky by writing your own parser implementations. diff --git a/src/combinator.rs b/src/combinator.rs index 81774f8d..a501d4ca 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -682,15 +682,15 @@ where go_extra!(O); } -/// See [`Parser::memoised`]. +/// See [`Parser::memoized`]. #[cfg(feature = "memoization")] #[derive(Copy, Clone)] -pub struct Memoised { +pub struct Memoized { pub(crate) parser: A, } #[cfg(feature = "memoization")] -impl<'a, I, E, A, O> ParserSealed<'a, I, O, E> for Memoised +impl<'a, I, E, A, O> ParserSealed<'a, I, O, E> for Memoized where I: Input<'a>, E: ParserExtra<'a, I>, diff --git a/src/input.rs b/src/input.rs index 558ac026..ea803030 100644 --- a/src/input.rs +++ b/src/input.rs @@ -651,6 +651,8 @@ where errors: &mut self.errors, state: &mut self.state, ctx: &self.ctx, + #[cfg(debug_assertions)] + rec_data: None, #[cfg(feature = "memoization")] memos: &mut self.memos, } @@ -666,6 +668,8 @@ where errors: &mut self.errors, state: &mut self.state, ctx: &self.ctx, + #[cfg(debug_assertions)] + rec_data: None, #[cfg(feature = "memoization")] memos: &mut self.memos, } @@ -687,6 +691,8 @@ pub struct InputRef<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> { pub(crate) errors: &'parse mut Errors, pub(crate) state: &'parse mut E::State, pub(crate) ctx: &'parse E::Context, + #[cfg(debug_assertions)] + pub(crate) rec_data: Option<(I::Offset, usize)>, #[cfg(feature = "memoization")] pub(crate) memos: &'parse mut HashMap<(I::Offset, usize), Option>>, } @@ -708,6 +714,8 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> state: self.state, ctx: new_ctx, errors: self.errors, + #[cfg(debug_assertions)] + rec_data: self.rec_data, #[cfg(feature = "memoization")] memos: self.memos, }; @@ -735,6 +743,8 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> state: self.state, ctx: self.ctx, errors: self.errors, + #[cfg(debug_assertions)] + rec_data: None, #[cfg(feature = "memoization")] memos, }; diff --git a/src/lib.rs b/src/lib.rs index aa66f886..5333a8f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -635,11 +635,11 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [left recursion](https://en.wikipedia.org/wiki/Left_recursion). // TODO: Example #[cfg(feature = "memoization")] - fn memoised(self) -> Memoised + fn memoized(self) -> Memoized where Self: Sized, { - Memoised { parser: self } + Memoized { parser: self } } /// Transform all outputs of this parser to a pretermined value. @@ -2262,7 +2262,7 @@ mod tests { .then_ignore(just('+')) .then(atom.clone()) .map(|(a, b)| format!("{}{}", a, b)) - .memoised() + .memoized() .or(atom) }) .then_ignore(end()) @@ -2292,7 +2292,7 @@ mod tests { .then_ignore(just('+')) .then(expr) .map(|(a, b)| format!("{}{}", a, b)) - .memoised(); + .memoized(); sum.or(atom) }) @@ -2306,28 +2306,56 @@ mod tests { mod debug_asserts { use super::prelude::*; - // TODO panic when left recursive parser is detected - // #[test] - // #[should_panic] - // fn debug_assert_left_recursive() { - // recursive(|expr| { - // let atom = any::<&str, extra::Default>() - // .filter(|c: &char| c.is_alphabetic()) - // .repeated() - // .at_least(1) - // .collect(); - - // let sum = expr - // .clone() - // .then_ignore(just('+')) - // .then(expr) - // .map(|(a, b)| format!("{}{}", a, b)); - - // sum.or(atom) - // }) - // .then_ignore(end()) - // .parse("a+b+c"); - // } + #[test] + #[should_panic] + fn debug_assert_double_left_recursive() { + recursive::<&str, char, extra::Default, _, _>(|a| recursive(move |_| a.or(just('a')))) + .parse("a"); + } + + #[test] + #[should_panic] + fn debug_assert_left_recursive() { + recursive(|expr| { + let atom = any::<&str, extra::Default>() + .filter(|c: &char| c.is_alphabetic()) + .repeated() + .at_least(1) + .collect(); + + let sum = expr + .clone() + .then_ignore(just('+')) + .then(expr) + .map(|(a, b)| format!("{}{}", a, b)); + + sum.or(atom) + }) + .parse("a+b+c"); + } + + #[test] + #[should_panic] + fn debug_assert_left_recursive_delc() { + let mut expr = Recursive::declare(); + expr.define({ + let atom = any::<&str, extra::Default>() + .filter(|c: &char| c.is_alphabetic()) + .repeated() + .at_least(1) + .collect(); + + let sum = expr + .clone() + .then_ignore(just('+')) + .then(expr.clone()) + .map(|(a, b)| format!("{}{}", a, b)); + + sum.or(atom) + }); + + expr.parse("a+b+c"); + } #[test] #[should_panic] @@ -2375,7 +2403,5 @@ mod tests { .repeated() .parse("a+b+c"); } - - // TODO what about IterConfigure and TryIterConfigure? } } diff --git a/src/recursive.rs b/src/recursive.rs index ea6af702..ad536a87 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -82,6 +82,7 @@ pub struct Indirect<'a, 'b, I: Input<'a>, O, Extra: ParserExtra<'a, I>> { /// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible. pub struct Recursive { inner: RecursiveInner

, + location: Location<'static>, } impl<'a, 'b, I: Input<'a>, O, E: ParserExtra<'a, I>> Recursive> { @@ -123,11 +124,13 @@ impl<'a, 'b, I: Input<'a>, O, E: ParserExtra<'a, I>> Recursive Self { Recursive { inner: RecursiveInner::Owned(RefC::new(Indirect { inner: OnceCell::new(), })), + location: *Location::caller(), } } @@ -151,6 +154,36 @@ impl Recursive

{ .expect("Recursive parser used before being defined"), } } + + #[inline(always)] + fn with_left_recursive_check<'a, I, E, R>( + &self, + inp: &mut InputRef<'a, '_, I, E>, + f: impl FnOnce(&mut InputRef<'a, '_, I, E>) -> R, + ) -> R + where + I: Input<'a>, + E: ParserExtra<'a, I>, + { + // TODO: Don't use address, since this might not be constant? + #[cfg(debug_assertions)] + let key = ( + inp.offset().offset, + RefC::as_ptr(&self.parser()) as *const () as usize, + ); + + #[cfg(debug_assertions)] + if inp.memos.insert(key, None).is_some() { + panic!("Recursive parser defined at {} is left-recursive. Consider using `.memoized()` or restructuring this parser to be right-recursive.", self.location); + } + + let res = f(inp); + + #[cfg(debug_assertions)] + inp.memos.remove(&key); + + res + } } impl Clone for Recursive

{ @@ -160,6 +193,7 @@ impl Clone for Recursive

{ RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()), RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()), }, + location: self.location, } } } @@ -182,15 +216,17 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - recurse(move || { - M::invoke( - self.parser() - .inner - .get() - .expect("Recursive parser used before being defined") - .as_ref(), - inp, - ) + self.with_left_recursive_check(inp, |inp| { + recurse(move || { + M::invoke( + self.parser() + .inner + .get() + .expect("Recursive parser used before being defined") + .as_ref(), + inp, + ) + }) }) } @@ -204,7 +240,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - recurse(move || M::invoke(&*self.parser(), inp)) + self.with_left_recursive_check(inp, |inp| recurse(move || M::invoke(&*self.parser(), inp))) } go_extra!(O); @@ -260,6 +296,7 @@ where /// ]))); /// ``` // INFO: Clone bound not actually needed, but good to be safe for future compat +#[track_caller] pub fn recursive<'a, 'b, I, O, E, A, F>(f: F) -> Recursive> where I: Input<'a>, @@ -267,10 +304,12 @@ where A: Parser<'a, I, O, E> + Clone + MaybeSync + 'b, F: FnOnce(Recursive>) -> A, { + let location = *Location::caller(); let rc = RefC::new_cyclic(|rc| { let rc: RefW> = rc.clone() as _; let parser = Recursive { inner: RecursiveInner::Unowned(rc.clone()), + location, }; f(parser) @@ -278,5 +317,6 @@ where Recursive { inner: RecursiveInner::Owned(rc), + location, } }