From d9b8041ca0d3977682dbfe27cc734243d44bbce1 Mon Sep 17 00:00:00 2001 From: Tadhg O'Higgins Date: Mon, 14 May 2018 16:10:12 -0400 Subject: [PATCH 1/2] Remove unnecessary unescaping (leftover from a prior code path that used lxml to generate HTML that needed later unescaping), fixing https://github.com/eregs/regulations-site/issues/514 and https://github.com/fecgov/fec-eregs/issues/382. --- .../generator/layers/layers_applier.py | 8 ---- regulations/tests/layers_appliers_test.py | 42 +++++++++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/regulations/generator/layers/layers_applier.py b/regulations/generator/layers/layers_applier.py index b849ca0d8..fd7b30353 100644 --- a/regulations/generator/layers/layers_applier.py +++ b/regulations/generator/layers/layers_applier.py @@ -1,7 +1,6 @@ import re from six.moves.queue import PriorityQueue -from six.moves.html_parser import HTMLParser from regulations.generator.layers.location_replace import LocationReplace @@ -29,11 +28,6 @@ def location_replace(self, xml_node, original, replacement, locations): LocationReplace().location_replace(xml_node, original, replacement, locations) - def unescape_text(self): - """ Because of the way we do replace_all(), we need to unescape HTML - entities. """ - self.text = HTMLParser().unescape(self.text) - def replace_all(self, original, replacement): """ Replace all occurrences of original with replacement. This is HTML aware; it effectively looks at all of the text in between HTML tags""" @@ -46,7 +40,6 @@ def replace_all(self, original, replacement): index = match.end() text_chunks.append(self.text[index:]) # trailing text self.text = "".join(text_chunks) - self.unescape_text() def replace_at(self, original, replacement, locations): """ Replace the occurrences of original at all the locations with @@ -55,7 +48,6 @@ def replace_at(self, original, replacement, locations): locations.sort() self.text = LocationReplace().location_replace_text( self.text, original, replacement, locations) - self.unescape_text() def apply_layers(self, original_text): self.text = original_text diff --git a/regulations/tests/layers_appliers_test.py b/regulations/tests/layers_appliers_test.py index b2848d764..25dc2e385 100644 --- a/regulations/tests/layers_appliers_test.py +++ b/regulations/tests/layers_appliers_test.py @@ -135,3 +135,45 @@ def test_replace_skip_location(self): "law. state law. state" " liability. ") self.assertEquals(applier.text, result) + + def test_apply_layers(self): + # Tests same as above but from one level out. + original = 'state' + replacement = 'state' + locations = [0, 2] + text = ("(6) Under state law. state " + "law. state liability. ") + + applier = layers_applier.LayersApplier() + applier.enqueue((original, replacement, locations)) + applier.apply_layers(text) + + result = ("(6) Under state " + "law. state law. state" + " liability. ") + self.assertEquals(applier.text, result) + + def test_apply_layers_escaping(self): + # See https://github.com/eregs/regulations-site/issues/514 and + # https://github.com/fecgov/fec-eregs/issues/382 + # + # It appears that we had a holdover step of unescaping that, thanks to + # looser interpretations in Python 3.6 (specifically, ``&sec`` was + # treated as a valid escape even without a trailing semicolon) started + # breaking links that have a ``§ion`` parameter. + original = 'state' + replacement = 'state' + locations = [0, 2] + text = ("(6) Under state law. state " + "law. state liability. " + "test") + + applier = layers_applier.LayersApplier() + applier.enqueue((original, replacement, locations)) + applier.apply_layers(text) + + result = ("(6) Under state " + "law. state law. state" + " liability. " + "test") + self.assertEquals(applier.text, result) From f4fb52f26a6a99521b2aedf80901884362389e3d Mon Sep 17 00:00:00 2001 From: Tadhg O'Higgins Date: Mon, 14 May 2018 16:22:56 -0400 Subject: [PATCH 2/2] Move bandit install back to PyPi rather than using GitHub install. --- requirements_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_test.txt b/requirements_test.txt index 1806c372a..ab71f775e 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -9,4 +9,4 @@ nose-exclude nose-testconfig selenium tox -git+https://github.com/openstack/bandit.git # PyPi version is out of date +bandit