Skip to content

Commit

Permalink
Fiddle with the handling of markup inside of maybe autolinks
Browse files Browse the repository at this point in the history
  • Loading branch information
tabatkins committed Mar 20, 2024
1 parent 29314d1 commit 7ed6d71
Show file tree
Hide file tree
Showing 20 changed files with 60 additions and 14 deletions.
29 changes: 17 additions & 12 deletions bikeshed/h/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
SafeText,
SelfClosedTag,
StartTag,
escapeAttr,
escapeHTML,
)
from .preds import charRefs
Expand Down Expand Up @@ -894,11 +895,11 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
# also if it actually produces a link
# (like ''width: <<length>>'' linking to 'width')
# it'll be broken anyway.
# So we'll hack this in - << gets turned into <
# So we'll hack this in - << gets turned into &lt;
# within a maybe.
# No chance of a link, but won't misparse in weird ways.
if "<<" in text:
text = re.sub(r"<<", "<", text)
text = re.sub(r"<<", "&lt;", text)
text = re.sub(r">>", ">", text)

# This syntax does double duty as both a linking syntax
Expand All @@ -925,19 +926,19 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
startTag = StartTag(
line=s.line(start),
endLine=s.line(textStart),
tag="a",
tag="bs-link",
attrs={
"bs-autolink-syntax": s[start:nodeEnd],
"bs-autolink-syntax": escapeAttr(s[start:nodeEnd]),
"class": "css",
"data-link-type": "propdesc",
"data-lt": propdescname,
},
)
if for_:
startTag.attrs["data-link-for"] = for_
startTag.attrs["data-link-for"] = escapeAttr(for_)
startTag.attrs["data-link-type"] = "descriptor"
startTag.finalize()
tagMiddle = SafeText(
tagMiddle = RawText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
Expand Down Expand Up @@ -988,16 +989,20 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
startTag = StartTag(
line=s.line(start),
endLine=s.line(textStart),
tag="a",
tag="bs-link",
attrs={
"bs-autolink-syntax": s[start:nodeEnd],
"bs-autolink-syntax": escapeAttr(s[start:nodeEnd]),
"class": "css",
"data-link-type": linkType,
"data-lt": valueName,
"data-lt": escapeAttr(valueName),
},
)
if "&lt;" in valueName:
m.die(f"The autolink {s[start:nodeEnd]} is using an HTML escape (or <<) in its value; you probably don't want to escape things there.", lineNum=s.line(start))
if for_:
startTag.attrs["data-link-for"] = for_
if "&lt;" in for_:
m.die(f"The autolink {s[start:nodeEnd]} is using an HTML escape in its for value; you probably don't want to escape things there.", lineNum=s.line(start))
startTag.attrs["data-link-for"] = escapeAttr(for_)
if (for_ is not None and not for_.endswith("<")) or match[3] is not None:
tagMiddle = SafeText(
line=s.line(textStart),
Expand All @@ -1006,7 +1011,7 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
)
else:
startTag.attrs["bs-replace-text-on-link-success"] = valueName
tagMiddle = SafeText(
tagMiddle = RawText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
Expand All @@ -1027,7 +1032,7 @@ def parseCSSMaybe(s: Stream, start: int) -> Result[list[ParserNode]]:
endLine=s.line(textStart),
tag="css",
)
tagMiddle = SafeText(
tagMiddle = RawText(
line=s.line(textStart),
endLine=s.line(textEnd),
text=text,
Expand Down
4 changes: 3 additions & 1 deletion bikeshed/unsortedJunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,12 +685,14 @@ def processAutolinks(doc: t.SpecT) -> None:
# An <a> without an href is an autolink.
# <i> is a legacy syntax for term autolinks. If it links up, we change it into an <a>.
# We exclude bibliographical links, as those are processed in `processBiblioLinks`.
query = "a:not([href]):not([data-link-type='biblio'])"
query = "a:not([href]):not([data-link-type='biblio']), bs-link"
if doc.md.useIAutolinks:
m.warn("Use <i> Autolinks is deprecated and will be removed. Please switch to using <a> elements.")
query += ", i"
autolinks = h.findAll(query, doc)
for el in autolinks:
if el.tag == "bs-link":
el.tag = "a"
# Explicitly empty linking text indicates this shouldn't be an autolink.
if el.get("data-lt") == "":
continue
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
LINE 124: Shorthand <<color space>> does not match any recognized shorthand grammar.
LINE 632: Shorthand <<color space>> does not match any recognized shorthand grammar.
LINE 721: The autolink ''from <<color>>'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 79: The heading 'color spaces {#color spaces-section}' needs a manually-specified ID.
LINE 47: Image doesn't exist, so I couldn't determine its width and height: 'images/LC-picker-scale.png'
LINE 48: Image doesn't exist, so I couldn't determine its width and height: 'images/LC-picker-map2.png'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ LINE 3908:7: Garbage after id=.
LINE 3908:1: Garbage at 3908:4 in <p>.
LINE 3943:21: Spurious / in <img>.
LINE 3976:21: Spurious / in <img>.
LINE 4641: The autolink ''<<integer [0,∞]>>+'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 4832:22: Spurious / in <img>.
LINE 5687:2: Spurious / in <img>.
LINE 5692:3: Spurious / in <img>.
Expand Down
2 changes: 2 additions & 0 deletions tests/github/w3c/csswg-drafts/css-gcpm-3/Overview.console.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ LINE 213:1: Spurious / in <img>.
LINE 299:1: Spurious / in <img>.
LINE 385:1: Spurious / in <img>.
LINE 749:1: Spurious / in <img>.
LINE 932: The autolink ''attr(&lt;identifier>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 991: The autolink ''attr(&lt;identifier>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Your document appears to use spaces to indent, but line 23 starts with tabs.
LINT: Your document appears to use spaces to indent, but line 46 starts with tabs.
LINT: Your document appears to use spaces to indent, but line 50 starts with tabs.
Expand Down
2 changes: 2 additions & 0 deletions tests/github/w3c/csswg-drafts/css-grid-1/Overview.console.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ LINE 295:3: Spurious / in <img>.
LINE 301:3: Spurious / in <img>.
LINE 607:3: Spurious / in <img>.
LINE 1250:4: Spurious / in <img>.
LINE 1542: The autolink ''minmax(auto, <<flex>>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1688:4: Spurious / in <img>.
LINE 2488:5: Spurious / in <img>.
LINE 3650: The autolink ''&lt;content-distribution>/stretch'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINT: Your document appears to use tabs to indent, but line 3681 starts with spaces.
WARNING: There are 1136 WPT tests underneath your path prefix 'css/css-grid/' that aren't in your document and must be added. (Use a <wpt hidden> if you don't actually want them in your document.)
css/css-grid/abspos/absolute-positioning-changing-containing-block-001.html
Expand Down
2 changes: 2 additions & 0 deletions tests/github/w3c/csswg-drafts/css-grid-2/Overview.console.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ LINE 281:3: Spurious / in <img>.
LINE 287:3: Spurious / in <img>.
LINE 593:3: Spurious / in <img>.
LINE 1296:4: Spurious / in <img>.
LINE 1620: The autolink ''minmax(auto, <<flex>>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1766:4: Spurious / in <img>.
LINE 2634:5: Spurious / in <img>.
LINE 4188: The autolink ''&lt;content-distribution>/stretch'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINT: Your document appears to use tabs to indent, but line 4219 starts with spaces.
LINE 103: Image doesn't exist, so I couldn't determine its width and height: 'images/flex-layout.png'
LINE 113: Image doesn't exist, so I couldn't determine its width and height: 'images/grid-layout.png'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
LINE 2526:27: Garbage after the tagname in </h4>.
LINE 2928: The autolink ''[&lt;baseline-keyword> <<percentage>>]+'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Your document appears to use tabs to indent, but line 2 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 3 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 2019 starts with spaces.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 963: The autolink ''&lt;button>'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 964: The autolink ''&lt;input>'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 317: Image doesn't exist, so I couldn't determine its width and height: 'images/first-letter2.gif'
LINE ~181: No 'property' refs found for 'color' with spec 'css-color-3'.
'color'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
LINE 172:42: Character reference '&B' didn't end in ;.
LINE 3099: The autolink ''u <<number>>'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 2386: The var 'top-level flag' (in global scope) is only used once.
If this is not a typo, please add an ignore='' attribute to the <var>.
LINE 3368: Multiple elements have the same ID 'at-rule'.
Expand Down
2 changes: 2 additions & 0 deletions tests/github/w3c/csswg-drafts/css-text-4/Overview.console.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 320: The autolink ''word-boundary-detection/auto(<<lang>>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1236: The autolink ''match-indent | <<length>> | <<percentage>>'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Your document appears to use tabs to indent, but line 1909 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 1911 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 1913 starts with spaces.
Expand Down
2 changes: 1 addition & 1 deletion tests/github/w3c/csswg-drafts/css-text-4/Overview.html
Original file line number Diff line number Diff line change
Expand Up @@ -1496,7 +1496,7 @@ <h4 class="heading settled" data-level="2.2.1" id="word-boundary-detection"><spa
language-specific content analysis
in response to user preferences.
User agents with this behavior must do this
by setting the <a data-link-type="dfn" href="https://drafts.csswg.org/css-cascade-5/#declared-value" id="ref-for-declared-value">declared value</a> of <a class="property css" data-link-type="property" href="#propdef-word-boundary-detection" id="ref-for-propdef-word-boundary-detection①">word-boundary-detection</a> to <span class="css">auto(&lt;&lt;lang>>)</span> in the <a data-link-type="dfn" href="https://drafts.csswg.org/css-cascade-5/#cascade-origin-user" id="ref-for-cascade-origin-user">User Origin</a>.
by setting the <a data-link-type="dfn" href="https://drafts.csswg.org/css-cascade-5/#declared-value" id="ref-for-declared-value">declared value</a> of <a class="property css" data-link-type="property" href="#propdef-word-boundary-detection" id="ref-for-propdef-word-boundary-detection①">word-boundary-detection</a> to <span class="css">auto(&amp;lt;lang>)</span> in the <a data-link-type="dfn" href="https://drafts.csswg.org/css-cascade-5/#cascade-origin-user" id="ref-for-cascade-origin-user">User Origin</a>.
User agents that do not support the <span id="ref-for-cascade-origin-user①">User Origin</span> may use the <a data-link-type="dfn" href="https://drafts.csswg.org/css-cascade-5/#cascade-origin-ua" id="ref-for-cascade-origin-ua">User-Agent Origin</a> instead.</p>
<div class="advisement">
Manual analysis of the content can be more reliable than UA heuristics.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
LINE 978: The autolink ''rotate3d(1, 0, 0, &lt;angle>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 980: The autolink ''rotate3d(0, 1, 0, &lt;angle>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 982: The autolink ''rotate3d(0, 0, 1, &lt;angle>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 982: The autolink ''rotate(&lt;angle>)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Your document appears to use tabs to indent, but line 52 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 53 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 54 starts with spaces.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
LINE 251: The autolink ''&lt;length>#{1,4}'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1552: The autolink ''&lt;resolution&gt;/dpi'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 1552: The autolink ''&lt;resolution&gt;/dpcm'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 1552: The autolink ''&lt;resolution&gt;/dppx'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 1574: The autolink ''&lt;resolution>/dppx'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 1674: The autolink ''<<position>> <<length>>?'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 423: Couldn't find WPT test 'css/css-values/initial-background-color.html' - did you misspell something?
LINE 423: Couldn't find WPT test 'css/css-values/unset-value-storage.html' - did you misspell something?
LINE 1808: Couldn't find WPT test 'css/vendor-imports/mozilla/mozilla-central-reftests/values3/calc-background-image-gradient-1.html' - did you misspell something?
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
LINE 247: The autolink ''&lt;length>#{1,4}'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 2122: The autolink ''&lt;resolution&gt;/dpi'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 2122: The autolink ''&lt;resolution&gt;/dpcm'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 2122: The autolink ''&lt;resolution&gt;/dppx'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 2145: The autolink ''&lt;resolution>/dppx'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINE 2276: The autolink ''<<position>> <<length>>?'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 545: Couldn't find WPT test 'css/css-values/initial-background-color.html' - did you misspell something?
LINE 545: Couldn't find WPT test 'css/css-values/unset-value-storage.html' - did you misspell something?
LINE 1509: Image doesn't exist, so I couldn't determine its width and height: 'images/Typography_Line_Terms.svg'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 1097: The autolink ''&lt;length&gt;'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1098: The autolink ''&lt;percentage&gt;'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Line 1914's indent contains tabs after spaces.
LINT: Line 1915's indent contains tabs after spaces.
LINT: Your document appears to use spaces to indent, but line 2510 starts with tabs.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 1175: The autolink ''&lt;length&gt;'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 1176: The autolink ''&lt;percentage&gt;'' is using an HTML escape in its value; you probably don't want to escape things there.
LINT: Line 2010's indent contains tabs after spaces.
LINT: Line 2011's indent contains tabs after spaces.
LINT: Your document appears to use spaces to indent, but line 2788 starts with tabs.
Expand Down
1 change: 1 addition & 0 deletions tests/github/w3c/csswg-drafts/css2/Overview.console.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
LINE 15173: The autolink ''<<border-style>>/none'' is using an HTML escape in its for value; you probably don't want to escape things there.
LINT: Your document appears to use spaces to indent, but line 559 starts with tabs.
LINT: Your document appears to use spaces to indent, but line 560 starts with tabs.
LINT: Your document appears to use spaces to indent, but line 561 starts with tabs.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 624: The autolink ''(600px &lt; height)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 633: The autolink ''(400px &lt; width &lt; 1000px)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 978:11: Character reference '&test' didn't end in ;.
LINT: Your document appears to use tabs to indent, but line 2260 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 2261 starts with spaces.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
LINE 630: The autolink ''(600px &lt; height)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 639: The autolink ''(400px &lt; width &lt; 1000px)'' is using an HTML escape in its value; you probably don't want to escape things there.
LINE 984:11: Character reference '&test' didn't end in ;.
LINT: Your document appears to use tabs to indent, but line 1463 starts with spaces.
LINT: Your document appears to use tabs to indent, but line 1464 starts with spaces.
Expand Down

0 comments on commit 7ed6d71

Please sign in to comment.