From ad3502147563bbc27e24dd27427f4f531a253229 Mon Sep 17 00:00:00 2001 From: Andrei Lapets Date: Thu, 28 Mar 2024 02:18:48 -0400 Subject: [PATCH] Update/improve implementation; add annotations, doctests, and README example. --- README.rst | 46 +++++++++++++++++++++++ src/parsial/parsial.py | 83 +++++++++++++++++++++++++++++++++++------- 2 files changed, 116 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index 23a59d8..e2e95f8 100644 --- a/README.rst +++ b/README.rst @@ -36,6 +36,52 @@ The library can be imported in the usual way: from parsial import parsial +Example +^^^^^^^ + +.. |parsial| replace:: ``parsial`` +.. _parsial: https://parsial.readthedocs.io/en/0.1.0/_source/parsial.html#parsial.parsial.parsial + +The |parsial|_ function accepts a parsing function (that takes a string input) and returns a new parsing function. This new function attempts to parse an input string using the original parsing function *even if parsing errors occur*. This is accomplished by selectively removing portions of the input that cause errors: + +.. code-block:: python + + >>> lines = [ + ... 'x = 123', + ... 'y =', + ... 'print(x)', + ... 'z = x +', + ... 'print(2 * x)' + ... ] + >>> import ast + >>> parser = parsial(ast.parse) + >>> (a, slices) = parser('\\n'.join(lines)) + >>> exec(compile(a, '', 'exec')) + 123 + 246 + +.. |slice| replace:: ``slice`` +.. _slice: https://docs.python.org/3/library/functions.html#slice + +In addition to returning the result, the new function also returns a list of |slice|_ instances (one for each line found in the input string): + +.. code-block:: python + + >>> for s in slices: + ... print(s) + slice(0, 7, None) + slice(0, 0, None) + slice(0, 8, None) + slice(0, 0, None) + slice(0, 12, None) + +Each |slice|_ instance indicates what portion of the corresponding line in the input was included in the successful parsing attempt: + +.. code-block:: python + + >>> [l[s] for (l, s) in zip(lines, slices)] + ['x = 123', '', 'print(x)', '', 'print(2 * x)'] + Development ----------- All installation and development dependencies are fully specified in ``pyproject.toml``. The ``project.optional-dependencies`` object is used to `specify optional requirements `__ for various development tasks. This makes it possible to specify additional options (such as ``docs``, ``lint``, and so on) when performing installation using `pip `__: diff --git a/src/parsial/parsial.py b/src/parsial/parsial.py index cdcd3f2..f20b3a4 100644 --- a/src/parsial/parsial.py +++ b/src/parsial/parsial.py @@ -3,40 +3,97 @@ that skips portions of the input that contain syntax errors. """ from __future__ import annotations -from typing import Any, List, Callable +from typing import Any, List, Tuple, Callable import doctest -def parsial(parse: Callable[[str], Any]) -> Callable[[str], List[int]]: +def parsial( + parse: Callable[[str], Any] + ) -> Callable[[str], Tuple[Any, List[slice]]]: """ - Accept a parsing function that takes string inputs and return a function - that returns some subset of the lines in the input string that, when - removed, allow the the parsing function to succeed. + Accept a parsing function (that takes a string input) and return a new + parsing function. This new function attempts to parse an input string + using the original parsing function even if parsing errors occur. This + is done by selectively removing portions of the input that cause + errors. + + >>> lines = [ + ... 'x = 123', + ... 'y =', + ... 'print(x)', + ... 'z = x +', + ... 'print(2 * x)' + ... ] + >>> import ast + >>> parser = parsial(ast.parse) + >>> (a, slices) = parser('\\n'.join(lines)) + >>> exec(compile(a, '', 'exec')) + 123 + 246 + + In addition to returning the result, the new function also returns a + list of :obj:`slice` instances (one for each line found in the input + string). + + >>> for s in slices: + ... print(s) + slice(0, 7, None) + slice(0, 0, None) + slice(0, 8, None) + slice(0, 0, None) + slice(0, 12, None) + + Each :obj:`slice` instance indicates what portion of the corresponding + line in the input was included in the successful parsing attempt. + + >>> [l[s] for (l, s) in zip(lines, slices)] + ['x = 123', '', 'print(x)', '', 'print(2 * x)'] + + For a string that can be parsed successfully, the parser supplied to + this function is invoked exactly once. In the worst case, it is invoked + once per line of the input string. """ # Define the new parsing function. def parse_(source: str) -> List[int]: lines = source.split('\n') + lines_ = None + result = None + + # Find the longest stretch of lines that begins with the first line + # and leads to a successful parse. for end in range(len(lines), -1, -1): try: - parse('\n'.join(lines[:end])) + result = parse('\n'.join(lines[:end])) lines_ = lines[:end] break - except Exception as _: + except Exception as _: # pylint: disable=broad-exception-caught pass - skips = [] + # If the entire input was not parsed via the block above, attempt to + # include each remaining line to see if a parse succeeds. Keep track + # of which lines are skipped. + skips = set() if end < len(lines): - skips.append(end) + skips.add(end) lines_ = lines[:end] + [''] for i in range(end + 1, len(lines)): try: lines__ = lines_ + [lines[i]] - parse('\n'.join(lines__)) + result = parse('\n'.join(lines__)) lines_ = lines__ - except Exception as _: + except Exception as _: # pylint: disable=broad-exception-caught lines_ += [''] - skips.append(i) + skips.add(i) - return skips + # Return the result of a successful parsing attempt, as well as a list + # of slices indicating what portions of each line were included to + # obtain the result. + return ( + result, + [ + slice(0, len(line) if i not in skips else 0) + for (i, line) in enumerate(lines) + ] + ) return parse_