-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
111 lines (82 loc) · 3.61 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
import logging
import string
import sys
from collections import OrderedDict
from typing import Iterable, Tuple, Dict, List, Set
log = logging.getLogger(__name__)
LOWERCASE = set(string.ascii_lowercase)
UPPERCASE = set(string.ascii_uppercase)
ASCII_LETTERS = LOWERCASE | UPPERCASE
def get_rs(rs: Iterable[str]) -> Iterable[Tuple[str, Set[str]]]:
"""Decode lines SWE lines containing the replacement mappings"""
for r in rs:
upper, lowers = r.split(":")
lowers = set(lowers.split(","))
if len(upper) != 1 or upper not in UPPERCASE:
raise ValueError("First character of R should be uppercase")
for lower in lowers:
if not lower or not all(l in LOWERCASE for l in lower):
raise ValueError("All characters on RHS of R line should be lowercase")
yield upper, set(lowers)
def simplify_problem(s, ts, rs):
# We first simplify the solution somewhat. If something maps to a letter
# not present in s it will never be a suitable replacement.
remove = set()
for replacements in rs.values():
for replacement in replacements:
if replacement not in s:
remove.add(replacement)
replacements -= remove
# Remove all replacements not mentioned. For example, C is never mentioned
# in first example so it's a needless burden.
keep = set(filter(str.isupper, "".join(ts)))
remove = set(rs.keys()) - keep
for r in remove:
del rs[r]
for k in list(rs.keys()):
rs[k] = sorted(rs[k], key=lambda c: (-len(c), c))
log.info("Simplified to {k} clauses and {x} variables.".format(k=len(ts), x=len(rs)))
return s, ts, rs
def parse(swe_lines: Iterable[str]) -> Tuple[str, List[str], Dict[str, Set[str]]]:
"""Decode given SWE file and run the decision algorithm"""
log.info("Parsing file..")
try:
k = int(next(swe_lines))
except (ValueError, StopIteration):
raise ValueError("First line must contain an integer")
# Get string which has to contain all substrings
s = next(swe_lines)
if not s or not all(l in LOWERCASE for l in s):
raise ValueError("String s should only contain lowercase letters")
# Get the collection of t's
ts = [next(swe_lines) for _ in range(k)]
ts_dedup = sorted(set(ts), key=lambda c: (-len(c), c)) # <- For the sake of being deterministic with no. threads spawned
log.info("Found {} double clauses, removing duplicates..".format(len(ts) - len(ts_dedup)))
ts = ts_dedup
for t in ts:
if not t or not all(l in ASCII_LETTERS for l in t):
raise ValueError("{} contained non-ascii chars".format(t))
# Get the collection of r's. The variable rs will contain a mapping
# from uppercase to the substitutions. For example: 'A' -> {'b', 'c'}.
rs = OrderedDict(sorted(get_rs(swe_lines)))
# Check for illegal substitues
for letter in "".join(ts):
if letter in LOWERCASE:
continue
if letter not in rs:
raise ValueError("{} not found in replacement mapping".format(letter))
return simplify_problem(s, ts, rs)
if __name__ == '__main__':
# Setup logging
logging.basicConfig(format='[%(asctime)s] %(message)s')
logging.getLogger().setLevel(logging.DEBUG)
swe_lines = (l.strip() for l in open(sys.argv[1]))
s, ts, rs = parse(swe_lines)
log.info("String found: {}".format(s))
log.info("Clauses found:")
for clause in ts:
log.info(" {}".format(clause))
log.info("Possible expansions:")
for k, expansions in rs.items():
log.info(" {} -> {}".format(k, expansions))