diff --git a/lib/sax.js b/lib/sax.js index 795d607e..1322caeb 100644 --- a/lib/sax.js +++ b/lib/sax.js @@ -276,12 +276,56 @@ // without a significant breaking change to either this parser, or the // JavaScript language. Implementation of an emoji-capable xml parser // is left as an exercise for the reader. - var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ - var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ + function isNameStartCharCode (cc) { + return (cc >= 97 && cc <= 122) || // a-z + (cc >= 65 && cc <= 90) || // A-Z + cc === 58 || // : + cc === 95 || // _ + (cc >= 0x00C0 && cc <= 0x00D6) || + (cc >= 0x00D8 && cc <= 0x00F6) || + (cc >= 0x00F8 && cc <= 0x02FF) || + (cc >= 0x0370 && cc <= 0x037D) || + (cc >= 0x037F && cc <= 0x1FFF) || + (cc >= 0x200C && cc <= 0x200D) || + (cc >= 0x2070 && cc <= 0x218F) || + (cc >= 0x2C00 && cc <= 0x2FEF) || + (cc >= 0x3001 && cc <= 0xD7FF) || + (cc >= 0xF900 && cc <= 0xFDCF) || + (cc >= 0xFDF0 && cc <= 0xFFFD) + } + + function isNameBodyCharCode (cc) { + return isNameStartCharCode(cc) || + cc === 45 || // - + cc === 46 || // . + (cc >= 48 && cc <= 57) || // 0-9 + cc === 0x00B7 || + (cc >= 0x0300 && cc <= 0x036F) || + (cc >= 0x203F && cc <= 0x2040) + } + + function isNameStart (c) { + var cc = c.charCodeAt(0) + return isNameStartCharCode(cc) + } + + function isNameBody (c) { + var cc = c.charCodeAt(0) + return isNameBodyCharCode(cc) + } - var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ - var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ + function isEntityStart (c) { + var cc = c.charCodeAt(0) + return cc === 35 || // # + isNameStartCharCode(cc) + } + + function isEntityBody (c) { + var cc = c.charCodeAt(0) + return cc === 35 || // # + isNameBodyCharCode(cc) + } function isWhitespace (c) { return c === ' ' || c === '\n' || c === '\r' || c === '\t' @@ -295,14 +339,6 @@ return c === '>' || isWhitespace(c) } - function isMatch (regex, c) { - return regex.test(c) - } - - function notMatch (regex, c) { - return !isMatch(regex, c) - } - var S = 0 sax.STATE = { BEGIN: S++, // leading byte order mark or whitespace @@ -1067,7 +1103,7 @@ parser.sgmlDecl = '' } else if (isWhitespace(c)) { // wait for it... - } else if (isMatch(nameStart, c)) { + } else if (isNameStart(c)) { parser.state = S.OPEN_TAG parser.tagName = c } else if (c === '/') { @@ -1270,7 +1306,7 @@ continue case S.OPEN_TAG: - if (isMatch(nameBody, c)) { + if (isNameBody(c)) { parser.tagName += c } else { newTag(parser) @@ -1305,7 +1341,7 @@ openTag(parser) } else if (c === '/') { parser.state = S.OPEN_TAG_SLASH - } else if (isMatch(nameStart, c)) { + } else if (isNameStart(c)) { parser.attribName = c parser.attribValue = '' parser.state = S.ATTRIB_NAME @@ -1324,7 +1360,7 @@ openTag(parser) } else if (isWhitespace(c)) { parser.state = S.ATTRIB_NAME_SAW_WHITE - } else if (isMatch(nameBody, c)) { + } else if (isNameBody(c)) { parser.attribName += c } else { strictFail(parser, 'Invalid attribute name') @@ -1347,7 +1383,7 @@ parser.attribName = '' if (c === '>') { openTag(parser) - } else if (isMatch(nameStart, c)) { + } else if (isNameStart(c)) { parser.attribName = c parser.state = S.ATTRIB_NAME } else { @@ -1391,7 +1427,7 @@ openTag(parser) } else if (c === '/') { parser.state = S.OPEN_TAG_SLASH - } else if (isMatch(nameStart, c)) { + } else if (isNameStart(c)) { strictFail(parser, 'No whitespace between attributes') parser.attribName = c parser.attribValue = '' @@ -1422,7 +1458,7 @@ if (!parser.tagName) { if (isWhitespace(c)) { continue - } else if (notMatch(nameStart, c)) { + } else if (!isNameStart(c)) { if (parser.script) { parser.script += '') { closeTag(parser) - } else if (isMatch(nameBody, c)) { + } else if (isNameBody(c)) { parser.tagName += c } else if (parser.script) { parser.script += '