Skip to content

Commit

Permalink
Merge pull request #133 from byexamples/Issue-128-Replace-regex-engine
Browse files Browse the repository at this point in the history
Issue 128 replace regex engine
  • Loading branch information
eldipa authored Nov 26, 2020
2 parents c636c12 + e03fff6 commit 483c17c
Show file tree
Hide file tree
Showing 23 changed files with 112 additions and 32 deletions.
5 changes: 3 additions & 2 deletions byexample/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import pprint, traceback, contextlib, os, re, string, shlex, logging, time
import pprint, traceback, contextlib, os, string, shlex, logging, time
from . import regex as re
'''
>>> from byexample.common import tohuman, short_string
>>> import time
Expand All @@ -11,7 +12,7 @@ def indent(s, indent=4):
''' Indent the given text.
See doctest._indent for the code that inspired this.
'''
return re.sub('(?m)^(?!$)', indent * ' ', s)
return re.compile('(?m)^(?!$)').sub(indent * ' ', s)


def short_string(s, max=14, sep='..'):
Expand Down
3 changes: 2 additions & 1 deletion byexample/differ.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .common import colored, ShebangTemplate
import string, re, difflib, tempfile, os, subprocess
import string, difflib, tempfile, os, subprocess
from . import regex as re

# what unicodes are control code?
# import unicodedata
Expand Down
3 changes: 2 additions & 1 deletion byexample/expected.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .log import clog, log_context
import string, re, time
import string, time
from . import regex as re
'''
>>> from byexample.log import init_log_system
>>> init_log_system()
Expand Down
7 changes: 4 additions & 3 deletions byexample/finder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re, os
import os
from . import regex as re
from .common import build_where_msg, tohuman, \
enhance_exceptions

Expand Down Expand Up @@ -476,14 +477,14 @@ def check_keep_matching(self, example_str, match):
'check_and_remove_indent' and other processing functions.
>>> from byexample.finder import ExampleFinder
>>> import re
>>> import byexample.regex as re
>>> mfinder = ExampleFinder(0, 'utf8'); mfinder.target = 'python-prompt'
>>> check_and_remove_indent = mfinder.check_and_remove_indent
>>> check_keep_matching = mfinder.check_keep_matching
>>> code = ' >>> 1 + 2'
>>> match = re.match(r'[ ]*>>> [^\n]*', code)
>>> match = re.compile(r'[ ]*>>> [^\n]*').match(code)
>>> code_i = check_and_remove_indent(code, ' ', (1, 2, 'foo.rst', None))
>>> code_i != code
Expand Down
9 changes: 4 additions & 5 deletions byexample/modules/clipboard.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import unicode_literals
from byexample.concern import Concern
import re
import byexample.regex as re
from functools import partial

stability = 'provisional'
Expand Down Expand Up @@ -52,9 +52,8 @@ def before_build_regex(self, example, options):
repl = partial(
self.repl_from_clipboard, clipboard=self.clipboard, missing=[]
)
example.expected_str = re.sub(
self.PASTE_RE, repl, example.expected_str
)
example.expected_str = re.compile(self.PASTE_RE
).sub(repl, example.expected_str)

# do not check for missings: we assume that they are capture tags

Expand All @@ -75,7 +74,7 @@ def finish_parse(self, example, options, exception):
clipboard=self.clipboard,
missing=missing
)
example.source = re.sub(self.PASTE_RE, repl, example.source)
example.source = re.compile(self.PASTE_RE).sub(repl, example.source)

if missing:
raise PasteError(example, missing)
Expand Down
1 change: 0 additions & 1 deletion byexample/modules/cond.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
from byexample.concern import Concern
import re
from functools import partial

stability = 'experimental'
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
"""

from __future__ import unicode_literals
import re, sys, time
import sys, time
import byexample.regex as re
from byexample.common import constant
from byexample.parser import ExampleParser
from byexample.runner import ExampleRunner, PexpectMixin, ShebangTemplate
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/delimiters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re, ast, itertools
import ast, itertools
import byexample.regex as re
from byexample.finder import ZoneDelimiter
from byexample.common import constant
from byexample.log import clog
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/elixir.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@
"""

from __future__ import unicode_literals
import re, pexpect, sys, time
import pexpect, sys, time
import byexample.regex as re
from byexample.common import constant
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"""

from __future__ import unicode_literals
import re, pexpect, sys, time
import pexpect, sys, time
import byexample.regex as re
from byexample.common import constant
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
2 changes: 1 addition & 1 deletion byexample/modules/javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"""

from __future__ import unicode_literals
import re
import byexample.regex as re
from byexample.common import constant, abspath
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
2 changes: 1 addition & 1 deletion byexample/modules/php.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"""

from __future__ import unicode_literals
import re
import byexample.regex as re
from byexample.common import constant
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"""

from __future__ import unicode_literals
import re, pexpect, sys, time
import pexpect, sys, time
import byexample.regex as re
from byexample.common import constant
from byexample.log import clog
from byexample.parser import ExampleParser, ExtendOptionParserMixin
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/ruby.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
"""

from __future__ import unicode_literals
import re, pexpect, sys, time
import pexpect, sys, time
import byexample.regex as re
from byexample.common import constant
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
3 changes: 2 additions & 1 deletion byexample/modules/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
"""

from __future__ import unicode_literals
import re, pexpect, sys, time
import pexpect, sys, time
import byexample.regex as re
from byexample.common import constant, Countdown
from byexample.parser import ExampleParser
from byexample.finder import ExampleFinder
Expand Down
5 changes: 3 additions & 2 deletions byexample/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re, shlex, argparse, bisect, collections
import shlex, argparse, bisect, collections
from . import regex as re
from .common import tohuman, constant
from .options import OptionParser, UnrecognizedOption, ExtendOptionParserMixin
from .expected import _LinearExpected, _RegexExpected
Expand Down Expand Up @@ -218,7 +219,7 @@ def expected_as_regexs(
>>> from byexample.parser import ExampleParser
>>> from functools import partial
>>> import re
>>> import byexample.regex as re
>>> parser = ExampleParser(0, 'utf8', None); parser.language = 'python'
>>> _as_regexs = partial(parser.expected_as_regexs, tags_enabled=True, input_enabled=True, normalize_whitespace=False, input_prefix_len_range=(6,12))
Expand Down
4 changes: 2 additions & 2 deletions byexample/parser_sm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re
from . import regex as re
from .common import constant, short_string
from .log import clog, log_context, DEBUG
import pprint
Expand All @@ -12,7 +12,7 @@
>>> from byexample.parser_sm import SM, SM_NormWS, SM_NotNormWS
>>> from byexample.parser import ExampleParser
>>> import re
>>> import byexample.regex as re
>>> from functools import partial
>>> parser = ExampleParser(0, 'utf8', None); parser.language = 'python'
Expand Down
15 changes: 15 additions & 0 deletions byexample/regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys
import regex


def compile(pattern, flags=0):
return regex.compile(pattern, flags)


escape = regex.escape

# Borrow from regex module its uppercase FLAGS
# so they are accessible from importing this module directly
module = sys.modules[__name__]
for sym in (sym for sym in dir(regex) if sym.isupper()):
setattr(module, sym, getattr(regex, sym))
47 changes: 44 additions & 3 deletions byexample/runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re, pexpect, time, termios, operator, os, itertools, contextlib
import pexpect, time, termios, operator, os, itertools, contextlib
import re as python_re
from . import regex as re
from functools import reduce, partial
from .executor import TimeoutException, InputPrefixNotFound
from .common import tohuman, ShebangTemplate, Countdown, short_string
Expand Down Expand Up @@ -78,6 +80,45 @@ def cancel(self, example, options):
return False


class PexpectSpawnAdapter(pexpect.spawn):
def compile_pattern_list(self, patterns):
''' This is an extension of pexpect.spawn.compile_pattern_list
to accept not only Python's regex objects (re module) but
also Barnett's regexs (third-party regex module).
This is a workaround for the issue #655
(https://github.com/pexpect/pexpect/issues/655)
'''
if patterns is None:
return []
if not isinstance(patterns, list):
patterns = [patterns]

# Allow dot to match \n
compile_flags = python_re.DOTALL
if self.ignorecase:
compile_flags = compile_flags | python_re.IGNORECASE
compiled_pattern_list = []
cls = pexpect.spawnbase
for idx, p in enumerate(patterns):
if isinstance(p, self.allowed_string_types):
p = self._coerce_expect_string(p)
compiled_pattern_list.append(
python_re.compile(p, compile_flags)
)
elif p is cls.EOF:
compiled_pattern_list.append(cls.EOF)
elif p is cls.TIMEOUT:
compiled_pattern_list.append(cls.TIMEOUT)
elif isinstance(p, type(python_re.compile(''))):
compiled_pattern_list.append(p)
elif isinstance(p, type(re.compile(''))): # <-- the workaround
compiled_pattern_list.append(p)
else:
self._pattern_type_err(p)
return compiled_pattern_list


class PexpectMixin(object):
def __init__(self, PS1_re, any_PS_re):
self.PS1_re = re.compile(PS1_re)
Expand All @@ -104,7 +145,7 @@ def _spawn_interpreter(
env.update({'LINES': str(rows), 'COLUMNS': str(cols)})

self._drop_output() # there shouldn't be any output yet but...
self.interpreter = pexpect.spawn(
self.interpreter = PexpectSpawnAdapter(
cmd,
echo=False,
encoding=self.encoding,
Expand Down Expand Up @@ -250,7 +291,7 @@ def _change_terminal_geometry(self, rows, cols, options):

@staticmethod
def _universal_new_lines(out):
return re.sub(PexpectMixin.UNIV_NL, '\n', out)
return re.compile(PexpectMixin.UNIV_NL).sub('\n', out)

def _emulate_ansi_terminal(self, chunks, join=True):
for chunk in chunks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ everything except the code between ``<pre>`` and ``</pre>`` tags.
This is what you need to write:

```python
>>> import re
>>> import byexample.regex as re
>>> from byexample.finder import ZoneDelimiter

>>> class HTMLPreBlockDelimiter(ZoneDelimiter):
Expand All @@ -64,5 +64,10 @@ or set of several extensions.
The ``zone_regex`` method should return a regular expression to find and capture
the zones.

While you can use the standard
[``re`` module](https://docs.python.org/3/library/re.html) it is
recommended to use ``byexample.regex`` which has some built-in
optimizations.

And optionally, the ``get_zone`` can be overridden to post-process the captured
string: use it to remove any spurious string that may had been captured.
9 changes: 7 additions & 2 deletions docs/contrib/how-to-support-new-finders-and-languages.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ To accomplish this we need to create a regular expression to find the
``~~~``, where the snippet of code is and where the expected output is.

```python
>>> import re
>>> import byexample.regex as re

>>> example_re = re.compile(r'''
... # begin with ~~~
Expand Down Expand Up @@ -107,6 +107,11 @@ The ``indent`` group is to count how many spaces are not part of the example
and they are just for indentation: ``byexample`` will *drop* the first line that
has a lower level of indentation and any subsequent line.

While you can use the standard
[``re`` module](https://docs.python.org/3/library/re.html) it is
recommended to use ``byexample.regex`` which has some built-in
optimizations.

### Detect the language

Then, the finder needs to determinate in which language the example
Expand Down Expand Up @@ -351,7 +356,7 @@ you do not need to install a real ``ArnoldC`` compiler.
... output = []
... for line in source_code.split('\n'):
... if line.startswith("TALK TO THE HAND"):
... to_print = re.search(r'"([^"]*)"', line).group(1)
... to_print = re.compile(r'"([^"]*)"').search(line).group(1)
... output.append(to_print + '\n')
...
... return '\n'.join(output)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
'pexpect>=4,<5', # pexpect 4.x.x required
'appdirs>=1.4.3,<2', # appdirs 1.4.x (x >= 3) required
'pyte==0.8.0', # pyte exact version 0.8.0 required
'regex>=2017.01.12', # regex's pickle was introduced in 2016
]

# these, on the other hand, are optional nice to have
Expand Down
3 changes: 3 additions & 0 deletions test/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,7 @@ RUN wget https://packages.erlang-solutions.com/erlang-solutions_2.0_all.deb && d
esl-erlang \
elixir

RUN DEBIAN_FRONTEND=noninteractive apt-get --no-install-recommends install -y \
python3-dev

CMD /bin/bash

0 comments on commit 483c17c

Please sign in to comment.