Skip to content

Commit

Permalink
Fix ampersand parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
siefkenj committed Sep 3, 2023
1 parent 2f50260 commit d483cd9
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 28 deletions.
6 changes: 4 additions & 2 deletions packages/parser/src/dast-to-xml/dast-util-to-xml.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Code modified from xast-util-to-xml MIT License https://github.com/syntax-tree/xast-util-to-xml
import { ccount } from "ccount";
import { DastElement, DastNodes, PrintOptions } from "../types";
import { clean, escape, name } from "./utils";
import { escape, mergeAdjacentTextInArray, name } from "./utils";

/**
* Serialize a xast tree to XML.
Expand Down Expand Up @@ -29,7 +29,9 @@ export function nodesToXml(
options: PrintOptions,
): string {
if (Array.isArray(node)) {
return node.map((child) => nodesToXml(child, options)).join("");
return mergeAdjacentTextInArray(node)
.map((child) => nodesToXml(child, options))
.join("");
}

const type = node && node.type;
Expand Down
38 changes: 38 additions & 0 deletions packages/parser/src/dast-to-xml/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { stringifyEntitiesLight } from "stringify-entities";
import { DastNodes } from "../types";

/**
* Escape a string.
Expand Down Expand Up @@ -40,3 +41,40 @@ export function name(value: string) {
const subset = ["\t", "\n", " ", '"', "&", "'", "/", "<", "=", ">"];
return escape(value, subset);
}

/**
* Merge adjacent text nodes in an array
*/
export function mergeAdjacentTextInArray(nodes: DastNodes[]): DastNodes[] {
const needsMerging = nodes.some(
(n, i) => n.type === "text" && nodes[i + 1]?.type === "text",
);
if (!needsMerging) {
return nodes;
}
const ret: DastNodes[] = [];
for (let i = 0; i < nodes.length; i++) {
let node = nodes[i];
let nextNode = nodes[i + 1];
if (!nextNode) {
ret.push(node);
continue;
}
if (node.type === "text" && nextNode.type === "text") {
node = { ...node };
ret.push(node);
while (nextNode?.type === "text") {
node.value += nextNode.value;
if (node.position && nextNode.position) {
node.position.end = nextNode.position.end;
}
i++;
nextNode = nodes[i + 1];
}
} else {
ret.push(node);
}
}

return ret;
}
14 changes: 8 additions & 6 deletions packages/parser/src/doenet.grammar
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ entity {
Text |
EntityReference |
CharacterReference |
Ampersand |
Cdata |
Element |
Comment |
Expand Down Expand Up @@ -43,8 +44,8 @@ Element {
}

AttributeValue {
"\"" (doubleQuoteAttributeContent | EntityReference | CharacterReference | InvalidEntity)* "\"" |
"'" (singleQuoteAttributeContent | EntityReference | CharacterReference | InvalidEntity)* "'"
"\"" (doubleQuoteAttributeContent | EntityReference | CharacterReference | Ampersand)* "\"" |
"'" (singleQuoteAttributeContent | EntityReference | CharacterReference | Ampersand)* "'"
}

Comment { "<!--" commentContent* "-->" }
Expand Down Expand Up @@ -93,13 +94,14 @@ Cdata { cdataStart cdataContent* "]]>" }

Is { "=" }

EntityReference { "&" ![#; ]+ ";" }
// See https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references
EntityReference { "&" identifier ";" }

CharacterReference { "&#" ![; ]+ ";" }
CharacterReference { ("&#x" $[0-9a-fA-F]+ ";" | "&#" $[0-9]+ ";") }

InvalidEntity { "&" }
Ampersand { "&" }

@precedence { CharacterReference, EntityReference, InvalidEntity }
@precedence { CharacterReference, EntityReference, Ampersand }

Text { ("<" space | "<=" | ![<&])+ }

Expand Down
20 changes: 10 additions & 10 deletions packages/parser/src/generated-assets/lezer-doenet.terms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ export const
Text = 7,
EntityReference = 8,
CharacterReference = 9,
Cdata = 10,
Element = 11,
EndTag = 12,
OpenTag = 13,
TagName = 14,
Attribute = 15,
AttributeName = 16,
Is = 17,
AttributeValue = 18,
InvalidEntity = 19,
Ampersand = 10,
Cdata = 11,
Element = 12,
EndTag = 13,
OpenTag = 14,
TagName = 15,
Attribute = 16,
AttributeName = 17,
Is = 18,
AttributeValue = 19,
CloseTag = 20,
SelfCloseEndTag = 21,
SelfClosingTag = 22,
Expand Down
14 changes: 7 additions & 7 deletions packages/parser/src/generated-assets/lezer-doenet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ import {startTag, commentContent, piContent, cdataContent, elementContext} from
import {NodeProp} from "@lezer/common"
export const parser = LRParser.deserialize({
version: 13,
states: "(jOVOaOOOwOxO'#CfO!PQpO'#CiO!vOaO'#CgOOOP'#Cg'#CgO!}OrO'#CsO#VOtO'#CtO#_QpO'#CuOOOP'#DV'#DVOOOP'#Cw'#CwQVOaOOOOOW'#Cx'#CxO#dOxO,59QOOOP,59Q,59QO#lQ!bO,59TOOOP'#C|'#C|O#wOaO,59RO$OQpO'#CpOOOP,59R,59ROOOQ'#C}'#C}O$TOrO,59_OOOP,59_,59_OOOS'#DO'#DOO$]OtO,59`OOOP,59`,59`O$eQpO,59aOOOP-E6u-E6uOOOW-E6v-E6vOOOP1G.l1G.lO$jQ!bO'#CkOOQO'#Cy'#CyO$xQ!bO1G.oOOOP1G.o1G.oOOOP1G.x1G.xOOOP-E6z-E6zOOOP1G.m1G.mO%TQpO,59[OOOQ-E6{-E6{OOOP1G.y1G.yOOOS-E6|-E6|OOOP1G.z1G.zOOOP1G.{1G.{O%YQpO,59VOOQO-E6w-E6wOOOP7+$Z7+$ZOOOP7+$d7+$dOOOP1G.v1G.vO%bO#tO'#CnO%sO&jO'#CnOOQO1G.q1G.qOOOO'#Cz'#CzO&UO#tO,59YOOQO,59Y,59YOOOO'#C{'#C{O&gO&jO,59YOOOO-E6x-E6xOOQO1G.t1G.tOOOO-E6y-E6y",
stateData: "&x~OxOS~OPQOSVOTWOVWOWWOXWOjXO{PO!RTO!TUO~OvZOz]O~O^^O~OPQOQaOSVOTWOVWOWWOXWO{PO!RTO!TUO~ORbO~P!UOtcO!QeO~OufO!ShO~O^iO~OvZOzlO~O[pO`mOeqO~ORsO~P!UO^tO~OtcO!QvO~OufO!SxO~O[yO~OazO[_X`_Xe_X~O[|O`mOe}O~O[!OO~O|!PO!O!QO~OW!SOX!SOc!SO|!UO}!SO~OW!VOX!VOc!VO!O!UO!P!VO~OW!SOX!SOc!SO|!YO}!SO~OW!VOX!VOc!VO!O!YO!P!VO~O",
goto: "#vzPPPPPPPPPP{{P!RP!XPP!]P!`P!f{{{P!l!r!x#O#U#[#b#hPPPPPP#nXWORY`XRORY`Tn^oR!RzQbRRs`XSORY`QYORjYQ[PRk[Qo^R{oQ!T!PR!X!TQ!W!QR!Z!WQ`RRr`QdTRudQgURwgSXOYT_R`",
nodeNames: "⚠ StartTag StartCloseTag MissingCloseTag StartCloseTag StartCloseTag Document Text EntityReference CharacterReference Cdata Element EndTag OpenTag TagName Attribute AttributeName Is AttributeValue InvalidEntity CloseTag SelfCloseEndTag SelfClosingTag Comment ProcessingInst MismatchedCloseTag DoctypeDecl",
states: "(jOVOaOOOzOxO'#CgO!SQpO'#CjO!|OaO'#ChOOOP'#Ch'#ChO#TOrO'#CsO#]OtO'#CtO#eQpO'#CuOOOP'#DV'#DVOOOP'#Cw'#CwQVOaOOOOOW'#Cx'#CxO#jOxO,59ROOOP,59R,59RO#rQ!bO,59UOOOP'#C|'#C|O#}OaO,59SO$UQpO'#CpOOOP,59S,59SOOOQ'#C}'#C}O$ZOrO,59_OOOP,59_,59_OOOS'#DO'#DOO$cOtO,59`OOOP,59`,59`O$kQpO,59aOOOP-E6u-E6uOOOW-E6v-E6vOOOP1G.m1G.mO$pQ!bO'#ClOOQO'#Cy'#CyO%OQ!bO1G.pOOOP1G.p1G.pOOOP1G.x1G.xOOOP-E6z-E6zOOOP1G.n1G.nO%ZQpO,59[OOOQ-E6{-E6{OOOP1G.y1G.yOOOS-E6|-E6|OOOP1G.z1G.zOOOP1G.{1G.{O%`QpO,59WOOQO-E6w-E6wOOOP7+$[7+$[OOOP7+$d7+$dOOOP1G.v1G.vO%hO#tO'#CoO%yO&jO'#CoOOQO1G.r1G.rOOOO'#Cz'#CzO&[O#tO,59ZOOQO,59Z,59ZOOOO'#C{'#C{O&mO&jO,59ZOOOO-E6x-E6xOOQO1G.u1G.uOOOO-E6y-E6y",
stateData: "'O~OxOS~OPQOSVOTWOVWOWWOXWOYWOjXO{PO!RTO!TUO~OvZOz]O~O_^O~OPQOQaOSVOTWOVWOWWOXWOYWO{PO!RTO!TUO~ORbO~P!XOtcO!QeO~OufO!ShO~O_iO~OvZOzlO~O]pOamOeqO~ORsO~P!XO_tO~OtcO!QvO~OufO!SxO~O]yO~ObzO]`Xa`Xe`X~O]|OamOe}O~O]!OO~O|!PO!O!QO~OW!SOX!SOY!SO|!UO}!SO~OW!VOX!VOY!VO!O!UO!P!VO~OW!SOX!SOY!SO|!YO}!SO~OW!VOX!VOY!VO!O!YO!P!VO~O",
goto: "#vzPPPPPPPPPPP{{P!RP!XPP!]!`P!f{{{P!l!r!x#O#U#[#b#hPPPPPP#nXWORY`XRORY`Tn^oR!RzQbRRs`XSORY`QYORjYQ[PRk[Qo^R{oQ!T!PR!X!TQ!W!QR!Z!WQ`RRr`QdTRudQgURwgSXOYT_R`",
nodeNames: "⚠ StartTag StartCloseTag MissingCloseTag StartCloseTag StartCloseTag Document Text EntityReference CharacterReference Ampersand Cdata Element EndTag OpenTag TagName Attribute AttributeName Is AttributeValue CloseTag SelfCloseEndTag SelfClosingTag Comment ProcessingInst MismatchedCloseTag DoctypeDecl",
maxTerm: 51,
context: elementContext,
nodeProps: [
[NodeProp.closedBy, 1,"SelfCloseEndTag EndTag",13,"CloseTag MissingCloseTag"],
[NodeProp.openedBy, 12,"StartTag StartCloseTag",20,"OpenTag",21,"StartTag"]
[NodeProp.closedBy, 1,"SelfCloseEndTag EndTag",14,"CloseTag MissingCloseTag"],
[NodeProp.openedBy, 13,"StartTag StartCloseTag",20,"OpenTag",21,"StartTag"]
],
skippedNodes: [0],
repeatNodeCount: 8,
tokenData: "!&o~R!XOX$nXY.sYZ.sZ]$n]^.s^p$npq.sqr$nrs/zsv$nvw0dwx2Rx}$n}!O2n!O!P$n!P!Q4z!Q![$n![!]6b!]!^$n!^!_:e!_!`! i!`!a!!Z!a!b!!{!b!c$n!c!}6b!}#P$n#P#Q!$c#Q#R$n#R#S6b#S#T$n#T#o6b#o%W$n%W%o6b%o%p$n%p&a6b&a&b$n&b1p6b1p4U$n4U4d6b4d4e$n4e$IS6b$IS$I`$n$I`$Ib6b$Ib$Kh$n$Kh%#t6b%#t&/x$n&/x&Et6b&Et&FV$n&FV;'S6b;'S;:j9o;:j?&r$n?&r?Ah6b?Ah?BY$n?BY?Mn6b?Mn~$ni$wVVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$na%eTVP!P`Ov%^wx%tx!^%^!^!_'a!_~%^P%ySVPOv%tw!^%t!^!_&V!_~%tP&YTXY&iYZ&i]^&ipq&i!_!`%tP&nZVPOX%tXY&iYZ&iZ]%t]^&i^p%tpq&iqv%tw!^%t!^!_&V!_~%ta'fZ!P`OX(XXY(dYZ(dZ](X]^(d^p(Xpq(dqv(Xx!_(X!_!`%^!`~(X`(^Q!P`Ov(Xx~(Xa(k[VP!P`OX%^XY(dYZ(dZ]%^]^(d^p%^pq(dqv%^wx%tx!^%^!^!_'a!_~%^X)hUVP}WOr)ars%tsv)aw!^)a!^!_)z!_~)aX*P[}WOX*uXY+TYZ+TZ]*u]^+T^p*upq+Tqr*usv*uw!_*u!_!`)a!`~*uW*zR}WOr*usv*uw~*uX+[]VP}WOX)aXY+TYZ+TZ])a]^+T^p)apq+Tqr)ars%tsv)aw!^)a!^!_)z!_~)ai,[^}W!P`OX-WXY-nYZ-nZ]-W]^-n^p-Wpq-nqr-Wrs(Xsv-Wwx*ux!_-W!_!`$n!`~-Wh-_T}W!P`Or-Wrs(Xsv-Wwx*ux~-Wi-w^VP}W!P`OX$nXY-nYZ-nZ]$n]^-n^p$npq-nqr$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no/O^VP}W!P`xUOX$nXY.sYZ.sZ]$n]^.s^p$npq.sqr$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk0TT|YVP!P`Ov%^wx%tx!^%^!^!_'a!_~%^~0iTc~Op0xqs0xst1at!]0x!^~0x~0{TOp0xqs0xt!]0x!]!^1[!^~0x~1aOW~~1dROp1mq!]1m!^~1m~1pSOp1mq!]1m!]!^1|!^~1m~2ROX~k2[U!ObVP}WOr)ars%tsv)aw!^)a!^!_)z!_~)ak2wXVP}W!P`Or$nrs%^sv$nwx)ax}$n}!O3d!O!^$n!^!_,T!_~$nk3mXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a4Y!a~$nk4eV!QQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nm5TXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a5p!a~$nm5{VeSVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no6o!O`S^QVP}W!P`Or$nrs%^sv$nwx)ax}$n}!O6b!O!P6b!P!Q$n!Q![6b![!]6b!]!^$n!^!_,T!_!c$n!c!}6b!}#R$n#R#S6b#S#T$n#T#o6b#o$}$n$}%O6b%O%W$n%W%o6b%o%p$n%p&a6b&a&b$n&b1p6b1p4U6b4U4d6b4d4e$n4e$IS6b$IS$I`$n$I`$Ib6b$Ib$Je$n$Je$Jg6b$Jg$Kh$n$Kh%#t6b%#t&/x$n&/x&Et6b&Et&FV$n&FV;'S6b;'S;:j9o;:j?&r$n?&r?Ah6b?Ah?BY$n?BY?Mn6b?Mn~$no9xXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_;=`$n;=`<%l6b<%l~$ni:l`}W!P`OX-WXY-nYZ-nZ]-W]^-n^p-Wpq-nqr;nrs(Xsv-Wwx*ux!_-W!_!`$n!`!a-W!a!b! P!b~-Wi;u]}W!P`Or-Wrs(Xsv-Wwx*ux}-W}!O<n!O!f-W!f!g=t!g!}-W!}#ODm#O#W-W#W#XKa#X~-Wi<uV}W!P`Or-Wrs(Xsv-Wwx*ux}-W}!O=[!O~-Wi=eT!RP}W!P`Or-Wrs(Xsv-Wwx*ux~-Wi={V}W!P`Or-Wrs(Xsv-Wwx*ux!q-W!q!r>b!r~-Wi>iV}W!P`Or-Wrs(Xsv-Wwx*ux!e-W!e!f?O!f~-Wi?VV}W!P`Or-Wrs(Xsv-Wwx*ux!v-W!v!w?l!w~-Wi?sV}W!P`Or-Wrs(Xsv-Wwx*ux!{-W!{!|@Y!|~-Wi@aV}W!P`Or-Wrs(Xsv-Wwx*ux!r-W!r!s@v!s~-Wi@}V}W!P`Or-Wrs(Xsv-Wwx*ux!g-W!g!hAd!h~-WiAkW}W!P`OrAdrsBTsvAdvwBiwxCXx!`Ad!`!aDT!a~AdaBYT!P`OvBTvxBix!`BT!`!aBz!a~BTPBlRO!`Bi!`!aBu!a~BiPBzOjPaCRQjP!P`Ov(Xx~(XXC^V}WOrCXrsBisvCXvwBiw!`CX!`!aCs!a~CXXCzRjP}WOr*usv*uw~*uiD^TjP}W!P`Or-Wrs(Xsv-Wwx*ux~-WiDtX}W!P`Or-Wrs(Xsv-Wwx*ux!e-W!e!fEa!f#V-W#V#WH{#W~-WiEhV}W!P`Or-Wrs(Xsv-Wwx*ux!f-W!f!gE}!g~-WiFUV}W!P`Or-Wrs(Xsv-Wwx*ux!c-W!c!dFk!d~-WiFrV}W!P`Or-Wrs(Xsv-Wwx*ux!v-W!v!wGX!w~-WiG`V}W!P`Or-Wrs(Xsv-Wwx*ux!c-W!c!dGu!d~-WiG|V}W!P`Or-Wrs(Xsv-Wwx*ux!}-W!}#OHc#O~-WiHlT{P}W!P`Or-Wrs(Xsv-Wwx*ux~-WiISV}W!P`Or-Wrs(Xsv-Wwx*ux#W-W#W#XIi#X~-WiIpV}W!P`Or-Wrs(Xsv-Wwx*ux#T-W#T#UJV#U~-WiJ^V}W!P`Or-Wrs(Xsv-Wwx*ux#h-W#h#iJs#i~-WiJzV}W!P`Or-Wrs(Xsv-Wwx*ux#T-W#T#UGu#U~-WiKhV}W!P`Or-Wrs(Xsv-Wwx*ux#c-W#c#dK}#d~-WiLUV}W!P`Or-Wrs(Xsv-Wwx*ux#V-W#V#WLk#W~-WiLrV}W!P`Or-Wrs(Xsv-Wwx*ux#h-W#h#iMX#i~-WiM`V}W!P`Or-Wrs(Xsv-Wwx*ux#m-W#m#nMu#n~-WiM|V}W!P`Or-Wrs(Xsv-Wwx*ux#d-W#d#eNc#e~-WiNjV}W!P`Or-Wrs(Xsv-Wwx*ux#X-W#X#YAd#Y~-Wi! YT!TP}W!P`Or-Wrs(Xsv-Wwx*ux~-Wm! tVaSVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no!!fV[UVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk!#UXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a!#q!a~$nk!#|V!SQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk!$lXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_#P$n#P#Q!%X#Q~$nk!%bXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a!%}!a~$nk!&YVzQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$n",
tokenData: "!)V~R!XOX$nXY.sYZ.sZ]$n]^.s^p$npq.sqr$nrs/zsv$nvw0dwx4ix}$n}!O5U!O!P$n!P!Q7b!Q![$n![!]8x!]!^$n!^!_<{!_!`!$P!`!a!$q!a!b!%c!b!c$n!c!}8x!}#P$n#P#Q!&y#Q#R$n#R#S8x#S#T$n#T#o8x#o%W$n%W%o8x%o%p$n%p&a8x&a&b$n&b1p8x1p4U$n4U4d8x4d4e$n4e$IS8x$IS$I`$n$I`$Ib8x$Ib$Kh$n$Kh%#t8x%#t&/x$n&/x&Et8x&Et&FV$n&FV;'S8x;'S;:j<V;:j?&r$n?&r?Ah8x?Ah?BY$n?BY?Mn8x?Mn~$ni$wVVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$na%eTVP!P`Ov%^wx%tx!^%^!^!_'a!_~%^P%ySVPOv%tw!^%t!^!_&V!_~%tP&YTXY&iYZ&i]^&ipq&i!_!`%tP&nZVPOX%tXY&iYZ&iZ]%t]^&i^p%tpq&iqv%tw!^%t!^!_&V!_~%ta'fZ!P`OX(XXY(dYZ(dZ](X]^(d^p(Xpq(dqv(Xx!_(X!_!`%^!`~(X`(^Q!P`Ov(Xx~(Xa(k[VP!P`OX%^XY(dYZ(dZ]%^]^(d^p%^pq(dqv%^wx%tx!^%^!^!_'a!_~%^X)hUVP}WOr)ars%tsv)aw!^)a!^!_)z!_~)aX*P[}WOX*uXY+TYZ+TZ]*u]^+T^p*upq+Tqr*usv*uw!_*u!_!`)a!`~*uW*zR}WOr*usv*uw~*uX+[]VP}WOX)aXY+TYZ+TZ])a]^+T^p)apq+Tqr)ars%tsv)aw!^)a!^!_)z!_~)ai,[^}W!P`OX-WXY-nYZ-nZ]-W]^-n^p-Wpq-nqr-Wrs(Xsv-Wwx*ux!_-W!_!`$n!`~-Wh-_T}W!P`Or-Wrs(Xsv-Wwx*ux~-Wi-w^VP}W!P`OX$nXY-nYZ-nZ]$n]^-n^p$npq-nqr$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no/O^VP}W!P`xUOX$nXY.sYZ.sZ]$n]^.s^p$npq.sqr$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk0TT|YVP!P`Ov%^wx%tx!^%^!^!_'a!_~%^~0iaY~st1n![!]2r!c!}2r#R#S2r#T#o2r%W%o2r%p&a2r&b1p2r4U4d2r4e$IS2r$I`$Ib2r$Kh%#t2r&/x&Et2r&FV;'S2r;'S;:j4c?&r?Ah2r?BY?Mn2r~1qQ!Q![1w#l#m2V~1zQ!Q![1w!]!^2Q~2VOX~~2YR!Q![2c!c!}2c#T#o2c~2fS!Q![2c!]!^2Q!c!}2c#T#o2c~2ug}!O2r!O!P2r!Q![2r![!]2r!]!^4^!c!}2r#R#S2r#T#o2r$}%O2r%W%o2r%p&a2r&b1p2r1p4U2r4U4d2r4e$IS2r$I`$Ib2r$Je$Jg2r$Kh%#t2r&/x&Et2r&FV;'S2r;'S;:j4c?&r?Ah2r?BY?Mn2r~4cOW~~4fP;=`<%l2rk4rU!ObVP}WOr)ars%tsv)aw!^)a!^!_)z!_~)ak5_XVP}W!P`Or$nrs%^sv$nwx)ax}$n}!O5z!O!^$n!^!_,T!_~$nk6TXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a6p!a~$nk6{V!QQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nm7kXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a8W!a~$nm8cVeSVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no9V!OaS_QVP}W!P`Or$nrs%^sv$nwx)ax}$n}!O8x!O!P8x!P!Q$n!Q![8x![!]8x!]!^$n!^!_,T!_!c$n!c!}8x!}#R$n#R#S8x#S#T$n#T#o8x#o$}$n$}%O8x%O%W$n%W%o8x%o%p$n%p&a8x&a&b$n&b1p8x1p4U8x4U4d8x4d4e$n4e$IS8x$IS$I`$n$I`$Ib8x$Ib$Je$n$Je$Jg8x$Jg$Kh$n$Kh%#t8x%#t&/x$n&/x&Et8x&Et&FV$n&FV;'S8x;'S;:j<V;:j?&r$n?&r?Ah8x?Ah?BY$n?BY?Mn8x?Mn~$no<`XVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_;=`$n;=`<%l8x<%l~$ni=S`}W!P`OX-WXY-nYZ-nZ]-W]^-n^p-Wpq-nqr>Urs(Xsv-Wwx*ux!_-W!_!`$n!`!a-W!a!b!#g!b~-Wi>]]}W!P`Or-Wrs(Xsv-Wwx*ux}-W}!O?U!O!f-W!f!g@[!g!}-W!}#OGT#O#W-W#W#XMw#X~-Wi?]V}W!P`Or-Wrs(Xsv-Wwx*ux}-W}!O?r!O~-Wi?{T!RP}W!P`Or-Wrs(Xsv-Wwx*ux~-Wi@cV}W!P`Or-Wrs(Xsv-Wwx*ux!q-W!q!r@x!r~-WiAPV}W!P`Or-Wrs(Xsv-Wwx*ux!e-W!e!fAf!f~-WiAmV}W!P`Or-Wrs(Xsv-Wwx*ux!v-W!v!wBS!w~-WiBZV}W!P`Or-Wrs(Xsv-Wwx*ux!{-W!{!|Bp!|~-WiBwV}W!P`Or-Wrs(Xsv-Wwx*ux!r-W!r!sC^!s~-WiCeV}W!P`Or-Wrs(Xsv-Wwx*ux!g-W!g!hCz!h~-WiDRW}W!P`OrCzrsDksvCzvwEPwxEox!`Cz!`!aFk!a~CzaDpT!P`OvDkvxEPx!`Dk!`!aEb!a~DkPESRO!`EP!`!aE]!a~EPPEbOjPaEiQjP!P`Ov(Xx~(XXEtV}WOrEorsEPsvEovwEPw!`Eo!`!aFZ!a~EoXFbRjP}WOr*usv*uw~*uiFtTjP}W!P`Or-Wrs(Xsv-Wwx*ux~-WiG[X}W!P`Or-Wrs(Xsv-Wwx*ux!e-W!e!fGw!f#V-W#V#WKc#W~-WiHOV}W!P`Or-Wrs(Xsv-Wwx*ux!f-W!f!gHe!g~-WiHlV}W!P`Or-Wrs(Xsv-Wwx*ux!c-W!c!dIR!d~-WiIYV}W!P`Or-Wrs(Xsv-Wwx*ux!v-W!v!wIo!w~-WiIvV}W!P`Or-Wrs(Xsv-Wwx*ux!c-W!c!dJ]!d~-WiJdV}W!P`Or-Wrs(Xsv-Wwx*ux!}-W!}#OJy#O~-WiKST{P}W!P`Or-Wrs(Xsv-Wwx*ux~-WiKjV}W!P`Or-Wrs(Xsv-Wwx*ux#W-W#W#XLP#X~-WiLWV}W!P`Or-Wrs(Xsv-Wwx*ux#T-W#T#ULm#U~-WiLtV}W!P`Or-Wrs(Xsv-Wwx*ux#h-W#h#iMZ#i~-WiMbV}W!P`Or-Wrs(Xsv-Wwx*ux#T-W#T#UJ]#U~-WiNOV}W!P`Or-Wrs(Xsv-Wwx*ux#c-W#c#dNe#d~-WiNlV}W!P`Or-Wrs(Xsv-Wwx*ux#V-W#V#W! R#W~-Wi! YV}W!P`Or-Wrs(Xsv-Wwx*ux#h-W#h#i! o#i~-Wi! vV}W!P`Or-Wrs(Xsv-Wwx*ux#m-W#m#n!!]#n~-Wi!!dV}W!P`Or-Wrs(Xsv-Wwx*ux#d-W#d#e!!y#e~-Wi!#QV}W!P`Or-Wrs(Xsv-Wwx*ux#X-W#X#YCz#Y~-Wi!#pT!TP}W!P`Or-Wrs(Xsv-Wwx*ux~-Wm!$[VbSVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$no!$|V]UVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk!%lXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a!&X!a~$nk!&dV!SQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$nk!'SXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_#P$n#P#Q!'o#Q~$nk!'xXVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_!`$n!`!a!(e!a~$nk!(pVzQVP}W!P`Or$nrs%^sv$nwx)ax!^$n!^!_,T!_~$n",
tokenizers: [startTag, commentContent, piContent, cdataContent, 0, 1, 2, 3, 4],
topRules: {"Document":[0,6]},
tokenPrec: 0
Expand Down
9 changes: 8 additions & 1 deletion packages/parser/src/lezer-to-dast/lezer-to-dast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ export function _lezerToDast(node: SyntaxNode, source: string): DastRoot {
position: lezerNodeToPosition(node, offsetMap),
},
];
case "Ampersand":
return [
{
type: "text",
value: "&",
position: lezerNodeToPosition(node, offsetMap),
},
];
case "CharacterReference":
case "EntityReference":
return [
Expand Down Expand Up @@ -212,7 +220,6 @@ export function _lezerToDast(node: SyntaxNode, source: string): DastRoot {
case "TagName":
case "StartTag":
case "StartCloseTag":
case "InvalidEntity":
return [];
case "MismatchedCloseTag": {
const parent = node.parent;
Expand Down
18 changes: 17 additions & 1 deletion packages/parser/test/__snapshots__/dast-basic.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,26 @@ exports[`DAST > xml-doenet-ampersand-1.doenet prints correctly in Xml format and
`;
exports[`DAST > xml-doenet-ampersand-1.doenet prints correctly in Xml format and Doenet format 2`] = `
"&lt; &#x25; &#x25 -- this one is okay.
"&amp;lt; &amp;#x25; &#x25 -- this one is okay.
(an amp followed by text with a semi colon should be an amp.)"
`;
exports[`DAST > xml-doenet-ampersand-2.doenet prints correctly in Xml format and Doenet format 1`] = `
"&amp;
&amp;
&amp;amp"
`;
exports[`DAST > xml-doenet-ampersand-2.doenet prints correctly in Xml format and Doenet format 2`] = `
"&
&
&amp"
`;
exports[`DAST > xml-doenet-ampersand-3.doenet prints correctly in Xml format and Doenet format 1`] = `"&amp;"`;
exports[`DAST > xml-doenet-ampersand-3.doenet prints correctly in Xml format and Doenet format 2`] = `"&"`;
exports[`DAST > xml-doenet-attributes-1.doenet prints correctly in Xml format and Doenet format 1`] = `"<foo bar=\\"true\\"} baz=\\"hi\\" bang=\\"there\\" />"`;
exports[`DAST > xml-doenet-attributes-1.doenet prints correctly in Xml format and Doenet format 2`] = `"<foo bar baz=\\"hi\\" bang=\\"there\\" />"`;
Expand Down
6 changes: 5 additions & 1 deletion packages/parser/test/__snapshots__/xml-compat.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ exports[`Xml files parse correctly > xml-12.doenet parses correctly 1`] = `"Docu

exports[`Xml files parse correctly > xml-doenet-ampersand-1.doenet parses correctly 1`] = `"Document(EntityReference,Text,EntityReference,Text,EntityReference,Text)"`;

exports[`Xml files parse correctly > xml-doenet-ampersand-2.doenet parses correctly 1`] = `"Document(Ampersand,Text,EntityReference,Text,Ampersand,Text)"`;

exports[`Xml files parse correctly > xml-doenet-ampersand-3.doenet parses correctly 1`] = `"Document(Ampersand)"`;

exports[`Xml files parse correctly > xml-doenet-attributes-1.doenet parses correctly 1`] = `"Document(Element(SelfClosingTag(StartTag,TagName,Attribute(AttributeName),Attribute(AttributeName,Is,AttributeValue),Attribute(AttributeName,Is,AttributeValue),SelfCloseEndTag)))"`;

exports[`Xml files parse correctly > xml-doenet-attributes-2.doenet parses correctly 1`] = `"Document(Element(SelfClosingTag(StartTag,TagName,Attribute(AttributeName,Is,AttributeValue),Attribute(AttributeName,Is,AttributeValue(InvalidEntity)),SelfCloseEndTag)))"`;
exports[`Xml files parse correctly > xml-doenet-attributes-2.doenet parses correctly 1`] = `"Document(Element(SelfClosingTag(StartTag,TagName,Attribute(AttributeName,Is,AttributeValue),Attribute(AttributeName,Is,AttributeValue(Ampersand)),SelfCloseEndTag)))"`;

exports[`Xml files parse correctly > xml-doenet-lessthan-in-text-1.doenet parses correctly 1`] = `"Document(Element(OpenTag(StartTag,TagName,EndTag),Text,CloseTag(StartCloseTag,TagName,EndTag)))"`;

Expand Down
Loading

0 comments on commit d483cd9

Please sign in to comment.