Merge pull request #519 from eregs/remove-stray-unescape

Remove unnecessary unescape step
eregs · May 14, 2018 · 7b901fa · 7b901fa
2 parents bcbe66e + 797fc99
commit 7b901fa
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 8 deletions.
diff --git a/regulations/generator/layers/layers_applier.py b/regulations/generator/layers/layers_applier.py
@@ -1,7 +1,6 @@
 import re
 
 from six.moves.queue import PriorityQueue
-from six.moves.html_parser import HTMLParser
 
 from regulations.generator.layers.location_replace import LocationReplace
 
@@ -29,11 +28,6 @@ def location_replace(self, xml_node, original, replacement, locations):
         LocationReplace().location_replace(xml_node, original, replacement,
                                            locations)
 
-    def unescape_text(self):
-        """ Because of the way we do replace_all(), we need to unescape HTML
-        entities.  """
-        self.text = HTMLParser().unescape(self.text)
-
     def replace_all(self, original, replacement):
         """ Replace all occurrences of original with replacement. This is HTML
         aware; it effectively looks at all of the text in between HTML tags"""
@@ -46,7 +40,6 @@ def replace_all(self, original, replacement):
             index = match.end()
         text_chunks.append(self.text[index:])   # trailing text
         self.text = "".join(text_chunks)
-        self.unescape_text()
 
     def replace_at(self, original, replacement, locations):
         """ Replace the occurrences of original at all the locations with
@@ -55,7 +48,6 @@ def replace_at(self, original, replacement, locations):
         locations.sort()
         self.text = LocationReplace().location_replace_text(
             self.text, original, replacement, locations)
-        self.unescape_text()
 
     def apply_layers(self, original_text):
         self.text = original_text

diff --git a/regulations/tests/layers_appliers_test.py b/regulations/tests/layers_appliers_test.py
@@ -122,3 +122,45 @@ def test_replace_skip_location(self):
                   "law. </dfn> state law. <dfn> <a href=\"link_url\">state"
                   "</a> liability. </dfn>")
         self.assertEquals(applier.text, result)
+
+    def test_apply_layers(self):
+        # Tests same as above but from one level out.
+        original = 'state'
+        replacement = '<a href="link_url">state</a>'
+        locations = [0, 2]
+        text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
+                "law. <dfn> state liability. </dfn>")
+
+        applier = layers_applier.LayersApplier()
+        applier.enqueue((original, replacement, locations))
+        applier.apply_layers(text)
+
+        result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
+                  "law. </dfn> state law. <dfn> <a href=\"link_url\">state"
+                  "</a> liability. </dfn>")
+        self.assertEquals(applier.text, result)
+
+    def test_apply_layers_escaping(self):
+        # See https://github.com/eregs/regulations-site/issues/514 and
+        # https://github.com/fecgov/fec-eregs/issues/382
+        #
+        # It appears that we had a holdover step of unescaping that, thanks to
+        # looser interpretations in Python 3.6 (specifically, ``&sec`` was
+        # treated as a valid escape even without a trailing semicolon) started
+        # breaking links that have a ``&section`` parameter.
+        original = 'state'
+        replacement = '<a href="link_url">state</a>'
+        locations = [0, 2]
+        text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
+                "law. <dfn> state liability. </dfn>"
+                "<a href='http://example.org?one=1&section2'>test</a>")
+
+        applier = layers_applier.LayersApplier()
+        applier.enqueue((original, replacement, locations))
+        applier.apply_layers(text)
+
+        result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
+                  "law. </dfn> state law. <dfn> <a href=\"link_url\">state"
+                  "</a> liability. </dfn>"
+                  "<a href='http://example.org?one=1&section2'>test</a>")
+        self.assertEquals(applier.text, result)