diff --git a/bikeshed/h/parser/parser.py b/bikeshed/h/parser/parser.py index c143e24732..1d426b7a87 100644 --- a/bikeshed/h/parser/parser.py +++ b/bikeshed/h/parser/parser.py @@ -3,7 +3,7 @@ import re from enum import Enum -from ... import constants, t +from ... import config, constants, t from ... import messages as m from . import preds from .nodes import ( @@ -16,7 +16,6 @@ SafeText, SelfClosedTag, StartTag, - escapeAttr, escapeHTML, ) from .preds import charRefs @@ -148,9 +147,9 @@ def parseNode( return Result(node, start + 2) if s.config.css: if s[start] == "'": - el, i = parseCSSMaybe(s, start).vi - if el is not None: - return Result(el, i) + maybeRes = parseCSSMaybe(s, start) + if maybeRes.err is None: + return maybeRes if s[start : start + 2] == "[[": # biblio link, for now just pass it thru node = RawText( @@ -867,7 +866,11 @@ def parseRangeComponent(val: str) -> tuple[str | None, float | int]: return val + unit, num -def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]: +MAYBE_PROP_RE = re.compile(r"^(@[\w-]+/)?([\w-]+): .+") +MAYBE_VAL_RE = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$") + + +def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]: # Maybes can cause parser issues, # like ''/px'', # but also can contain other markup that would split the text, @@ -876,41 +879,152 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]: return Result.fail(start) i = start + 2 + textStart = i + text, i = s.skipTo(i, "''").vi if text is None: return Result.fail(start) if "\n" in text: return Result.fail(start) - i += 2 + textEnd = i + nodeEnd = i + 2 # A lot of maybes have <> links in them. # They break in interesting ways sometimes, but # also if it actually produces a link # (like ''width: <>'' linking to 'width') # it'll be broken anyway. - # So we'll hack this in - << gets turned into < + # So we'll hack this in - << gets turned into < # within a maybe. # No chance of a link, but won't misparse in weird ways. - if "<<" in text: - rawContents = text.replace("<<", "<").replace(">>", ">") - else: - rawContents = text + text = re.sub(r"<<", "<", text) + text = re.sub(r">>", ">", text) + + # This syntax does double duty as both a linking syntax + # and just a "style as CSS code" syntax. + # So, you have to be careful that something that might *look* like + # an autolink, but actually wasn't intended as such and thus fails + # to link, doesn't have its text mangled as a result. + # * text like `foo: ...` is probably a propdesc link, + # with the same text as what's written, + # so it's safe + # * text like `foo` is probably a maybe link, + # with the same text as what's written, + # so it's safe too + # * text like `foo/bar` might be a maybe link; + # if it is, its text is `bar`, but if not it should + # stay as `foo/bar`. + # So it's not safe, and we need to guard against this. + # * anything else isn't a link, should just keep its text as-is. + # In all cases, + + match = MAYBE_PROP_RE.match(text) + if match: + for_, propdescname = match.groups() + startTag = StartTag( + line=s.line(start), + endLine=s.line(textStart), + tag="a", + attrs={ + "bs-autolink-syntax": s[start:nodeEnd], + "class": "css", + "data-link-type": "propdesc", + "data-lt": propdescname, + }, + ) + if for_: + startTag.attrs["data-link-for"] = for_ + startTag.attrs["data-link-type"] = "descriptor" + startTag.finalize() + tagMiddle = SafeText( + line=s.line(textStart), + endLine=s.line(textEnd), + text=text, + ) + endTag = EndTag( + line=s.line(textEnd), + endLine=s.line(nodeEnd), + tag=startTag.tag, + ) + return Result([startTag, tagMiddle, endTag], nodeEnd) + + match = MAYBE_VAL_RE.match(text) + if match: + for_, valueName, linkType = match.groups() + if linkType is None: + linkType = "maybe" + elif linkType in config.maybeTypes: + pass + else: + m.die( + f"Shorthand ''{text}'' gives type as '{linkType}', but only “maybe” sub-types are allowed: {config.englishFromList(config.maybeTypes)}.", + lineNum=s.line(start), + ) + startTag = StartTag( + line=s.line(start), + endLine=s.line(textStart), + tag="css", + ) + tagMiddle = SafeText( + line=s.line(textStart), + endLine=s.line(textEnd), + text=valueName, + ) + endTag = EndTag( + line=s.line(textEnd), + endLine=s.line(nodeEnd), + tag=startTag.tag, + ) + return Result([startTag, tagMiddle, endTag], nodeEnd) + + # Probably a valid link, but *possibly* not, + # so keep the text as-is, but set the intended link text + # if it *does* succeed. + startTag = StartTag( + line=s.line(start), + endLine=s.line(textStart), + tag="a", + attrs={ + "bs-autolink-syntax": s[start:nodeEnd], + "bs-replace-text-on-link-success": valueName, + "class": "css", + "data-link-type": linkType, + "data-lt": valueName, + }, + ) + if for_: + startTag.attrs["data-link-for"] = for_ + startTag.finalize() + tagMiddle = SafeText( + line=s.line(textStart), + endLine=s.line(textEnd), + text=text, + ) + endTag = EndTag( + line=s.line(textEnd), + endLine=s.line(nodeEnd), + tag=startTag.tag, + ) + return Result([startTag, tagMiddle, endTag], nodeEnd) + # Doesn't look like a maybe link, so it's just CSS text. startTag = StartTag( line=s.line(start), - endLine=s.line(start), - tag="fake-maybe-placeholder", - attrs={"bs-autolink-syntax": s[start:i], "bs-original-contents": escapeAttr(text)}, - ).finalize() - el = RawElement( - line=startTag.line, + endLine=s.line(textStart), + tag="css", + ) + tagMiddle = SafeText( + line=s.line(textStart), + endLine=s.line(textEnd), + text=text, + ) + endTag = EndTag( + line=s.line(textEnd), + endLine=s.line(nodeEnd), tag=startTag.tag, - startTag=startTag, - data=rawContents, - endLine=s.line(i), ) - return Result(el, i) + return Result([startTag, tagMiddle, endTag], nodeEnd) codeSpanStartRe = re.compile(r"`+") diff --git a/bikeshed/shorthands/__init__.py b/bikeshed/shorthands/__init__.py index 346c179428..834ae90d66 100644 --- a/bikeshed/shorthands/__init__.py +++ b/bikeshed/shorthands/__init__.py @@ -6,6 +6,5 @@ def run(doc: t.SpecT) -> None: oldShorthands.transformShorthandElements(doc) - oldShorthands.transformMaybePlaceholders(doc) oldShorthands.transformAutolinkShortcuts(doc) oldShorthands.transformProductionGrammars(doc) diff --git a/bikeshed/shorthands/oldShorthands.py b/bikeshed/shorthands/oldShorthands.py index 3c561b108f..2872603896 100644 --- a/bikeshed/shorthands/oldShorthands.py +++ b/bikeshed/shorthands/oldShorthands.py @@ -6,67 +6,6 @@ from .. import messages as m -def transformMaybePlaceholders(doc: t.SpecT) -> None: - propRe = re.compile(r"^([\w-]+): .+") - valRe = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$") - for el in h.findAll("fake-maybe-placeholder", doc): - text = el.get("bs-original-contents") - assert text is not None - match = propRe.match(text) - if match: - el.tag = "a" - el.set("class", "css") - el.set("data-link-type", "propdesc") - el.set("data-lt", match.group(1)) - continue - match = valRe.match(text) - if match: - if match.group(3) is None: - linkType = "maybe" - elif match.group(3) in config.maybeTypes: - linkType = match.group(3) - else: - m.die( - f"Shorthand ''{match.group(0)}'' gives type as '{match.group(3)}', but only “maybe” types are allowed.", - el=el, - ) - el.tag = "css" - continue - el.tag = "a" - el.set("class", "css") - el.set("data-link-type", linkType) - el.set("data-lt", match.group(2)) - # Three cases to worry about: - # 1. ''foo/valid-value'' (successful link) - # 2. ''foo/invalid-value'' (intended link, but unsuccessful) - # 3. ''foo&0x2f;bar'' (not a link, just wants a slash in text) - # - # Handling (1) is easy - on successful link, I'll swap the text - # for the reffed value. - # Distinguish (2) from (3) is hard, and they need to be treated - # differently - (3) should be left alone, while (2) needs to - # have its text swapped to "invalid-value". - # - # Compromise: if it looks *sufficiently close* to a link - # I'll swap the text ahead of time, to remove any metadata - # that shouldn't display for a link. - # Otherwise I'll leave it alone, but if it successfully links - # based on literal text, it'll swap its text out. - # - # "Sufficiently close" means it has a for or type value, - # and *doesn't* contain what looks like a close tag - # (which would otherwise look like a for value due to the slash). - if (match.group(1) is not None or match.group(3) is not None) and " None: # Do the remaining textual replacements