Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge pull request #519 from eregs/remove-stray-unescape
Browse files Browse the repository at this point in the history
Remove unnecessary unescape step
  • Loading branch information
tadhg-ohiggins authored May 14, 2018
2 parents bcbe66e + 797fc99 commit 7b901fa
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 8 deletions.
8 changes: 0 additions & 8 deletions regulations/generator/layers/layers_applier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re

from six.moves.queue import PriorityQueue
from six.moves.html_parser import HTMLParser

from regulations.generator.layers.location_replace import LocationReplace

Expand Down Expand Up @@ -29,11 +28,6 @@ def location_replace(self, xml_node, original, replacement, locations):
LocationReplace().location_replace(xml_node, original, replacement,
locations)

def unescape_text(self):
""" Because of the way we do replace_all(), we need to unescape HTML
entities. """
self.text = HTMLParser().unescape(self.text)

def replace_all(self, original, replacement):
""" Replace all occurrences of original with replacement. This is HTML
aware; it effectively looks at all of the text in between HTML tags"""
Expand All @@ -46,7 +40,6 @@ def replace_all(self, original, replacement):
index = match.end()
text_chunks.append(self.text[index:]) # trailing text
self.text = "".join(text_chunks)
self.unescape_text()

def replace_at(self, original, replacement, locations):
""" Replace the occurrences of original at all the locations with
Expand All @@ -55,7 +48,6 @@ def replace_at(self, original, replacement, locations):
locations.sort()
self.text = LocationReplace().location_replace_text(
self.text, original, replacement, locations)
self.unescape_text()

def apply_layers(self, original_text):
self.text = original_text
Expand Down
42 changes: 42 additions & 0 deletions regulations/tests/layers_appliers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,45 @@ def test_replace_skip_location(self):
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
"</a> liability. </dfn>")
self.assertEquals(applier.text, result)

def test_apply_layers(self):
# Tests same as above but from one level out.
original = 'state'
replacement = '<a href="link_url">state</a>'
locations = [0, 2]
text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
"law. <dfn> state liability. </dfn>")

applier = layers_applier.LayersApplier()
applier.enqueue((original, replacement, locations))
applier.apply_layers(text)

result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
"</a> liability. </dfn>")
self.assertEquals(applier.text, result)

def test_apply_layers_escaping(self):
# See https://github.com/eregs/regulations-site/issues/514 and
# https://github.com/fecgov/fec-eregs/issues/382
#
# It appears that we had a holdover step of unescaping that, thanks to
# looser interpretations in Python 3.6 (specifically, ``&sec`` was
# treated as a valid escape even without a trailing semicolon) started
# breaking links that have a ``&section`` parameter.
original = 'state'
replacement = '<a href="link_url">state</a>'
locations = [0, 2]
text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
"law. <dfn> state liability. </dfn>"
"<a href='http://example.org?one=1&section2'>test</a>")

applier = layers_applier.LayersApplier()
applier.enqueue((original, replacement, locations))
applier.apply_layers(text)

result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
"</a> liability. </dfn>"
"<a href='http://example.org?one=1&section2'>test</a>")
self.assertEquals(applier.text, result)

0 comments on commit 7b901fa

Please sign in to comment.