Skip to content

Commit

Permalink
Move maybe shorthands into the HTML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
tabatkins committed Mar 19, 2024
1 parent 0b4e1de commit 140edc3
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 84 deletions.
158 changes: 136 additions & 22 deletions bikeshed/h/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from enum import Enum

from ... import constants, t
from ... import config, constants, t
from ... import messages as m
from . import preds
from .nodes import (
Expand All @@ -16,7 +16,6 @@
SafeText,
SelfClosedTag,
StartTag,
escapeAttr,
escapeHTML,
)
from .preds import charRefs
Expand Down Expand Up @@ -148,9 +147,9 @@ def parseNode(
return Result(node, start + 2)
if s.config.css:
if s[start] == "'":
el, i = parseCSSMaybe(s, start).vi
if el is not None:
return Result(el, i)
maybeRes = parseCSSMaybe(s, start)
if maybeRes.err is None:
return maybeRes
if s[start : start + 2] == "[[":
# biblio link, for now just pass it thru
node = RawText(
Expand Down Expand Up @@ -867,7 +866,11 @@ def parseRangeComponent(val: str) -> tuple[str | None, float | int]:
return val + unit, num


def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]:
MAYBE_PROP_RE = re.compile(r"^(@[\w-]+/)?([\w-]+): .+")
MAYBE_VAL_RE = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$")


def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
# Maybes can cause parser issues,
# like ''<length>/px'',
# but also can contain other markup that would split the text,
Expand All @@ -876,41 +879,152 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[RawElement]:
return Result.fail(start)
i = start + 2

textStart = i

text, i = s.skipTo(i, "''").vi
if text is None:
return Result.fail(start)
if "\n" in text:
return Result.fail(start)
i += 2
textEnd = i
nodeEnd = i + 2

# A lot of maybes have <<foo>> links in them.
# They break in interesting ways sometimes, but
# also if it actually produces a link
# (like ''width: <<length>>'' linking to 'width')
# it'll be broken anyway.
# So we'll hack this in - << gets turned into &lt;
# So we'll hack this in - << gets turned into <
# within a maybe.
# No chance of a link, but won't misparse in weird ways.

if "<<" in text:
rawContents = text.replace("<<", "&lt;").replace(">>", "&gt;")
else:
rawContents = text
text = re.sub(r"<<", "<", text)
text = re.sub(r">>", ">", text)

# This syntax does double duty as both a linking syntax
# and just a "style as CSS code" syntax.
# So, you have to be careful that something that might *look* like
# an autolink, but actually wasn't intended as such and thus fails
# to link, doesn't have its text mangled as a result.
# * text like `foo: ...` is probably a propdesc link,
# with the same text as what's written,
# so it's safe
# * text like `foo` is probably a maybe link,
# with the same text as what's written,
# so it's safe too
# * text like `foo/bar` might be a maybe link;
# if it is, its text is `bar`, but if not it should
# stay as `foo/bar`.
# So it's not safe, and we need to guard against this.
# * anything else isn't a link, should just keep its text as-is.
# In all cases,

match = MAYBE_PROP_RE.match(text)
if match:
for_, propdescname = match.groups()
startTag = StartTag(
line=s.line(start),
endLine=s.line(textStart),
tag="a",
attrs={
"bs-autolink-syntax": s[start:nodeEnd],
"class": "css",
"data-link-type": "propdesc",
"data-lt": propdescname,
},
)
if for_:
startTag.attrs["data-link-for"] = for_
startTag.attrs["data-link-type"] = "descriptor"
startTag.finalize()
tagMiddle = SafeText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
)
endTag = EndTag(
line=s.line(textEnd),
endLine=s.line(nodeEnd),
tag=startTag.tag,
)
return Result([startTag, tagMiddle, endTag], nodeEnd)

match = MAYBE_VAL_RE.match(text)
if match:
for_, valueName, linkType = match.groups()
if linkType is None:
linkType = "maybe"
elif linkType in config.maybeTypes:
pass
else:
m.die(
f"Shorthand ''{text}'' gives type as '{linkType}', but only “maybe” sub-types are allowed: {config.englishFromList(config.maybeTypes)}.",
lineNum=s.line(start),
)
startTag = StartTag(
line=s.line(start),
endLine=s.line(textStart),
tag="css",
)
tagMiddle = SafeText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=valueName,
)
endTag = EndTag(
line=s.line(textEnd),
endLine=s.line(nodeEnd),
tag=startTag.tag,
)
return Result([startTag, tagMiddle, endTag], nodeEnd)

# Probably a valid link, but *possibly* not,
# so keep the text as-is, but set the intended link text
# if it *does* succeed.
startTag = StartTag(
line=s.line(start),
endLine=s.line(textStart),
tag="a",
attrs={
"bs-autolink-syntax": s[start:nodeEnd],
"bs-replace-text-on-link-success": valueName,
"class": "css",
"data-link-type": linkType,
"data-lt": valueName,
},
)
if for_:
startTag.attrs["data-link-for"] = for_
startTag.finalize()
tagMiddle = SafeText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
)
endTag = EndTag(
line=s.line(textEnd),
endLine=s.line(nodeEnd),
tag=startTag.tag,
)
return Result([startTag, tagMiddle, endTag], nodeEnd)

# Doesn't look like a maybe link, so it's just CSS text.
startTag = StartTag(
line=s.line(start),
endLine=s.line(start),
tag="fake-maybe-placeholder",
attrs={"bs-autolink-syntax": s[start:i], "bs-original-contents": escapeAttr(text)},
).finalize()
el = RawElement(
line=startTag.line,
endLine=s.line(textStart),
tag="css",
)
tagMiddle = SafeText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
)
endTag = EndTag(
line=s.line(textEnd),
endLine=s.line(nodeEnd),
tag=startTag.tag,
startTag=startTag,
data=rawContents,
endLine=s.line(i),
)
return Result(el, i)
return Result([startTag, tagMiddle, endTag], nodeEnd)


codeSpanStartRe = re.compile(r"`+")
Expand Down
1 change: 0 additions & 1 deletion bikeshed/shorthands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@

def run(doc: t.SpecT) -> None:
oldShorthands.transformShorthandElements(doc)
oldShorthands.transformMaybePlaceholders(doc)
oldShorthands.transformAutolinkShortcuts(doc)
oldShorthands.transformProductionGrammars(doc)
61 changes: 0 additions & 61 deletions bikeshed/shorthands/oldShorthands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,67 +6,6 @@
from .. import messages as m


def transformMaybePlaceholders(doc: t.SpecT) -> None:
propRe = re.compile(r"^([\w-]+): .+")
valRe = re.compile(r"^(?:(\S*)/)?(\S[^!]*)(?:!!([\w-]+))?$")
for el in h.findAll("fake-maybe-placeholder", doc):
text = el.get("bs-original-contents")
assert text is not None
match = propRe.match(text)
if match:
el.tag = "a"
el.set("class", "css")
el.set("data-link-type", "propdesc")
el.set("data-lt", match.group(1))
continue
match = valRe.match(text)
if match:
if match.group(3) is None:
linkType = "maybe"
elif match.group(3) in config.maybeTypes:
linkType = match.group(3)
else:
m.die(
f"Shorthand ''{match.group(0)}'' gives type as '{match.group(3)}', but only “maybe” types are allowed.",
el=el,
)
el.tag = "css"
continue
el.tag = "a"
el.set("class", "css")
el.set("data-link-type", linkType)
el.set("data-lt", match.group(2))
# Three cases to worry about:
# 1. ''foo/valid-value'' (successful link)
# 2. ''foo/invalid-value'' (intended link, but unsuccessful)
# 3. ''foo&0x2f;bar'' (not a link, just wants a slash in text)
#
# Handling (1) is easy - on successful link, I'll swap the text
# for the reffed value.
# Distinguish (2) from (3) is hard, and they need to be treated
# differently - (3) should be left alone, while (2) needs to
# have its text swapped to "invalid-value".
#
# Compromise: if it looks *sufficiently close* to a link
# I'll swap the text ahead of time, to remove any metadata
# that shouldn't display for a link.
# Otherwise I'll leave it alone, but if it successfully links
# based on literal text, it'll swap its text out.
#
# "Sufficiently close" means it has a for or type value,
# and *doesn't* contain what looks like a close tag
# (which would otherwise look like a for value due to the slash).
if (match.group(1) is not None or match.group(3) is not None) and "</" not in text:
h.clearContents(el)
el.text = match.group(2)
else:
el.set("bs-replace-text-on-link-success", match.group(2))
if match.group(1) is not None:
el.set("for", match.group(1))
continue
el.tag = "css"


def transformAutolinkShortcuts(doc: t.SpecT) -> None:
# Do the remaining textual replacements

Expand Down

0 comments on commit 140edc3

Please sign in to comment.