Move maybe shorthands into the HTML parser

speced · Mar 19, 2024 · 140edc3 · 140edc3
1 parent 0b4e1de
commit 140edc3
Show file tree

Hide file tree

Showing 3 changed files with 136 additions and 84 deletions.
diff --git a/bikeshed/h/parser/parser.py b/bikeshed/h/parser/parser.py
@@ -3,7 +3,7 @@
 import re
 from enum import Enum
 
-from ... import constants, t
+from ... import config, constants, t
 from ... import messages as m
 from . import preds
 from .nodes import (
@@ -16,7 +16,6 @@
     SafeText,
     SelfClosedTag,
     StartTag,
-    escapeAttr,
     escapeHTML,
 )
 from .preds import charRefs
@@ -148,9 +147,9 @@ def parseNode(
             return Result(node, start + 2)
     if s.config.css:
         if s[start] == "'":
-            el, i = parseCSSMaybe(s, start).vi
-            if el is not None:
-                return Result(el, i)
+            maybeRes = parseCSSMaybe(s, start)
+            if maybeRes.err is None:
+                return maybeRes
     if s[start : start + 2] == "[[":
         # biblio link, for now just pass it thru
         node = RawText(
@@ -867,7 +866,11 @@ def parseRangeComponent(val: str) -> tuple[str | None, float | int]:
     return val + unit, num
 
 
-def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]:
+MAYBE_PROP_RE = re.compile(r"^(@[\w-]+/)?([\w-]+): .+")
+MAYBE_VAL_RE = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$")
+
+
+def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
     # Maybes can cause parser issues,
     # like ''<length>/px'',
     # but also can contain other markup that would split the text,
@@ -876,41 +879,152 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]:
         return Result.fail(start)
     i = start + 2
 
+    textStart = i
+
     text, i = s.skipTo(i, "''").vi
     if text is None:
         return Result.fail(start)
     if "\n" in text:
         return Result.fail(start)
-    i += 2
+    textEnd = i
+    nodeEnd = i + 2
 
     # A lot of maybes have <<foo>> links in them.
     # They break in interesting ways sometimes, but
     # also if it actually produces a link
     # (like ''width: <<length>>'' linking to 'width')
     # it'll be broken anyway.
-    # So we'll hack this in - << gets turned into &lt;
+    # So we'll hack this in - << gets turned into <
     # within a maybe.
     # No chance of a link, but won't misparse in weird ways.
-
     if "<<" in text:
-        rawContents = text.replace("<<", "&lt;").replace(">>", "&gt;")
-    else:
-        rawContents = text
+        text = re.sub(r"<<", "<", text)
+        text = re.sub(r">>", ">", text)
+
+    # This syntax does double duty as both a linking syntax
+    # and just a "style as CSS code" syntax.
+    # So, you have to be careful that something that might *look* like
+    # an autolink, but actually wasn't intended as such and thus fails
+    # to link, doesn't have its text mangled as a result.
+    # * text like `foo: ...` is probably a propdesc link,
+    #   with the same text as what's written,
+    #   so it's safe
+    # * text like `foo` is probably a maybe link,
+    #   with the same text as what's written,
+    #   so it's safe too
+    # * text like `foo/bar` might be a maybe link;
+    #   if it is, its text is `bar`, but if not it should
+    #   stay as `foo/bar`.
+    #   So it's not safe, and we need to guard against this.
+    # * anything else isn't a link, should just keep its text as-is.
+    # In all cases,
+
+    match = MAYBE_PROP_RE.match(text)
+    if match:
+        for_, propdescname = match.groups()
+        startTag = StartTag(
+            line=s.line(start),
+            endLine=s.line(textStart),
+            tag="a",
+            attrs={
+                "bs-autolink-syntax": s[start:nodeEnd],
+                "class": "css",
+                "data-link-type": "propdesc",
+                "data-lt": propdescname,
+            },
+        )
+        if for_:
+            startTag.attrs["data-link-for"] = for_
+            startTag.attrs["data-link-type"] = "descriptor"
+        startTag.finalize()
+        tagMiddle = SafeText(
+            line=s.line(textStart),
+            endLine=s.line(textEnd),
+            text=text,
+        )
+        endTag = EndTag(
+            line=s.line(textEnd),
+            endLine=s.line(nodeEnd),
+            tag=startTag.tag,
+        )
+        return Result([startTag, tagMiddle, endTag], nodeEnd)
+
+    match = MAYBE_VAL_RE.match(text)
+    if match:
+        for_, valueName, linkType = match.groups()
+        if linkType is None:
+            linkType = "maybe"
+        elif linkType in config.maybeTypes:
+            pass
+        else:
+            m.die(
+                f"Shorthand ''{text}'' gives type as '{linkType}', but only “maybe” sub-types are allowed: {config.englishFromList(config.maybeTypes)}.",
+                lineNum=s.line(start),
+            )
+            startTag = StartTag(
+                line=s.line(start),
+                endLine=s.line(textStart),
+                tag="css",
+            )
+            tagMiddle = SafeText(
+                line=s.line(textStart),
+                endLine=s.line(textEnd),
+                text=valueName,
+            )
+            endTag = EndTag(
+                line=s.line(textEnd),
+                endLine=s.line(nodeEnd),
+                tag=startTag.tag,
+            )
+            return Result([startTag, tagMiddle, endTag], nodeEnd)
+
+        # Probably a valid link, but *possibly* not,
+        # so keep the text as-is, but set the intended link text
+        # if it *does* succeed.
+        startTag = StartTag(
+            line=s.line(start),
+            endLine=s.line(textStart),
+            tag="a",
+            attrs={
+                "bs-autolink-syntax": s[start:nodeEnd],
+                "bs-replace-text-on-link-success": valueName,
+                "class": "css",
+                "data-link-type": linkType,
+                "data-lt": valueName,
+            },
+        )
+        if for_:
+            startTag.attrs["data-link-for"] = for_
+        startTag.finalize()
+        tagMiddle = SafeText(
+            line=s.line(textStart),
+            endLine=s.line(textEnd),
+            text=text,
+        )
+        endTag = EndTag(
+            line=s.line(textEnd),
+            endLine=s.line(nodeEnd),
+            tag=startTag.tag,
+        )
+        return Result([startTag, tagMiddle, endTag], nodeEnd)
 
+    # Doesn't look like a maybe link, so it's just CSS text.
     startTag = StartTag(
         line=s.line(start),
-        endLine=s.line(start),
-        tag="fake-maybe-placeholder",
-        attrs={"bs-autolink-syntax": s[start:i], "bs-original-contents": escapeAttr(text)},
-    ).finalize()
-    el = RawElement(
-        line=startTag.line,
+        endLine=s.line(textStart),
+        tag="css",
+    )
+    tagMiddle = SafeText(
+        line=s.line(textStart),
+        endLine=s.line(textEnd),
+        text=text,
+    )
+    endTag = EndTag(
+        line=s.line(textEnd),
+        endLine=s.line(nodeEnd),
         tag=startTag.tag,
-        startTag=startTag,
-        data=rawContents,
-        endLine=s.line(i),
     )
-    return Result(el, i)
+    return Result([startTag, tagMiddle, endTag], nodeEnd)
 
 
 codeSpanStartRe = re.compile(r"`+")

diff --git a/bikeshed/shorthands/__init__.py b/bikeshed/shorthands/__init__.py
@@ -6,6 +6,5 @@
 
 def run(doc: t.SpecT) -> None:
     oldShorthands.transformShorthandElements(doc)
-    oldShorthands.transformMaybePlaceholders(doc)
     oldShorthands.transformAutolinkShortcuts(doc)
     oldShorthands.transformProductionGrammars(doc)
diff --git a/bikeshed/shorthands/oldShorthands.py b/bikeshed/shorthands/oldShorthands.py
@@ -6,67 +6,6 @@
 from .. import messages as m
 
 
-def transformMaybePlaceholders(doc: t.SpecT) -> None:
-    propRe = re.compile(r"^([\w-]+): .+")
-    valRe = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$")
-    for el in h.findAll("fake-maybe-placeholder", doc):
-        text = el.get("bs-original-contents")
-        assert text is not None
-        match = propRe.match(text)
-        if match:
-            el.tag = "a"
-            el.set("class", "css")
-            el.set("data-link-type", "propdesc")
-            el.set("data-lt", match.group(1))
-            continue
-        match = valRe.match(text)
-        if match:
-            if match.group(3) is None:
-                linkType = "maybe"
-            elif match.group(3) in config.maybeTypes:
-                linkType = match.group(3)
-            else:
-                m.die(
-                    f"Shorthand ''{match.group(0)}'' gives type as '{match.group(3)}', but only “maybe” types are allowed.",
-                    el=el,
-                )
-                el.tag = "css"
-                continue
-            el.tag = "a"
-            el.set("class", "css")
-            el.set("data-link-type", linkType)
-            el.set("data-lt", match.group(2))
-            # Three cases to worry about:
-            # 1. ''foo/valid-value'' (successful link)
-            # 2. ''foo/invalid-value'' (intended link, but unsuccessful)
-            # 3. ''foo&0x2f;bar'' (not a link, just wants a slash in text)
-            #
-            # Handling (1) is easy - on successful link, I'll swap the text
-            # for the reffed value.
-            # Distinguish (2) from (3) is hard, and they need to be treated
-            # differently - (3) should be left alone, while (2) needs to
-            # have its text swapped to "invalid-value".
-            #
-            # Compromise: if it looks *sufficiently close* to a link
-            # I'll swap the text ahead of time, to remove any metadata
-            # that shouldn't display for a link.
-            # Otherwise I'll leave it alone, but if it successfully links
-            # based on literal text, it'll swap its text out.
-            #
-            # "Sufficiently close" means it has a for or type value,
-            # and *doesn't* contain what looks like a close tag
-            # (which would otherwise look like a for value due to the slash).
-            if (match.group(1) is not None or match.group(3) is not None) and "</" not in text:
-                h.clearContents(el)
-                el.text = match.group(2)
-            else:
-                el.set("bs-replace-text-on-link-success", match.group(2))
-            if match.group(1) is not None:
-                el.set("for", match.group(1))
-            continue
-        el.tag = "css"
-
-
 def transformAutolinkShortcuts(doc: t.SpecT) -> None:
     # Do the remaining textual replacements