-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunraw.ts
300 lines (283 loc) · 9.65 KB
/
unraw.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/**
* @file unraw.ts
* @author Ian Sanders
* @copyright 2019 Ian Sanders
* @description Undo `String.raw`.
* Convert raw escape sequences to their respective characters.
* @license MIT
*/
/**
* Parse a string as a base-16 number. This is more strict than `parseInt` as it
* will not allow any other characters, including (for example) "+", "-", and
* ".".
* @param hex A string containing a hexadecimal number.
* @returns The parsed integer, or `NaN` if the string is not a valid hex
* number.
*/
function parseHexToInt(hex: string): number {
const isOnlyHexChars = !hex.match(/[^a-f0-9]/i)
return isOnlyHexChars ? parseInt(hex, 16) : NaN
}
/**
* Check the validity and length of a hexadecimal code and optionally enforces
* a specific number of hex digits.
* @param hex The string to validate and parse.
* @param errorName The name of the error message to throw a `SyntaxError` with
* if `hex` is invalid. This is used to index `errorMessages`.
* @param enforcedLength If provided, will throw an error if `hex` is not
* exactly this many characters.
* @returns The parsed hex number as a normal number.
* @throws {SyntaxError} If the code is not valid.
*/
function validateAndParseHex(
hex: string,
errorName: ErrorType,
enforcedLength?: number
): number {
const parsedHex = parseHexToInt(hex)
if (
Number.isNaN(parsedHex) ||
(enforcedLength !== undefined && enforcedLength !== hex.length)
) {
throw new SyntaxError(errorMessages.get(errorName))
}
return parsedHex
}
/**
* Parse a two-digit hexadecimal character escape code.
* @param code The two-digit hexadecimal number that represents the character to
* output.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the code is not valid hex or is not the right
* length.
*/
function parseHexadecimalCode(code: string): string {
const parsedCode = validateAndParseHex(
code,
ErrorType.MalformedHexadecimal,
2
)
return String.fromCharCode(parsedCode)
}
/**
* Parse a four-digit Unicode character escape code.
* @param code The four-digit unicode number that represents the character to
* output.
* @param surrogateCode Optional four-digit unicode surrogate that represents
* the other half of the character to output.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the codes are not valid hex or are not the right
* length.
*/
function parseUnicodeCode(code: string, surrogateCode?: string): string {
const parsedCode = validateAndParseHex(code, ErrorType.MalformedUnicode, 4)
if (surrogateCode !== undefined) {
const parsedSurrogateCode = validateAndParseHex(
surrogateCode,
ErrorType.MalformedUnicode,
4
)
return String.fromCharCode(parsedCode, parsedSurrogateCode)
}
return String.fromCharCode(parsedCode)
}
/**
* Test if the text is surrounded by curly braces (`{}`).
* @param text Text to check.
* @returns `true` if the text is in the form `{*}`.
*/
function isCurlyBraced(text: string): boolean {
return text.charAt(0) === '{' && text.charAt(text.length - 1) === '}'
}
/**
* Parse a Unicode code point character escape code.
* @param codePoint A unicode escape code point, including the surrounding curly
* braces.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the code is not valid hex or does not have the
* surrounding curly braces.
*/
function parseUnicodeCodePointCode(codePoint: string): string {
if (!isCurlyBraced(codePoint)) {
throw new SyntaxError(errorMessages.get(ErrorType.MalformedUnicode))
}
const withoutBraces = codePoint.slice(1, -1)
const parsedCode = validateAndParseHex(
withoutBraces,
ErrorType.MalformedUnicode
)
try {
return String.fromCodePoint(parsedCode)
} catch (err) {
throw err instanceof RangeError
? new SyntaxError(errorMessages.get(ErrorType.CodePointLimit))
: err
}
}
/**
* Parse an octal escape code.
* @param code An octal escape code. Assumed to be valid because an invalid
* octal escape code will never be matched.
* @param error If `true`, will throw an error without attempting to parse the
* code.
* @returns The single character represented by the code.
* @throws {SyntaxError} Only if `throw` is `true`.
*/
function parseOctalCode(code: string, error: true): never
function parseOctalCode(code: string, error?: false): string
function parseOctalCode(code: string, error: boolean): string | never
// Have to give overload that takes boolean for when compiler doesn't know if
// true or false
function parseOctalCode(code: string, error = false): string | never {
if (error) {
throw new SyntaxError(errorMessages.get(ErrorType.OctalDeprecation))
}
// The original regex only allows digits so we don't need to have a strict
// octal parser like hexToInt. Length is not enforced for octals.
const parsedCode = parseInt(code, 8)
return String.fromCharCode(parsedCode)
}
/**
* Map of unescaped letters to their corresponding special JS escape characters.
* Intentionally does not include characters that map to themselves like "\'".
*/
const singleCharacterEscapes = new Map<string, string>([
['b', '\b'],
['f', '\f'],
['n', '\n'],
['r', '\r'],
['t', '\t'],
['v', '\v'],
['0', '\0'],
])
/**
* Parse a single character escape sequence and return the matching character.
* If none is matched, defaults to `code`.
* @param code A single character code.
*/
function parseSingleCharacterCode(code: string): string {
return singleCharacterEscapes.get(code) || code
}
/**
* Matches every escape sequence possible, including invalid ones.
*
* All capture groups (described below) are unique (only one will match), except
* for 4, which can only potentially match if 3 does.
*
* **Capture Groups:**
* 0. A single backslash
* 1. Hexadecimal code
* 2. Unicode code point code with surrounding curly braces
* 3. Unicode escape code with surrogate
* 4. Surrogate code
* 5. Unicode escape code without surrogate
* 6. Octal code _NOTE: includes "0"._
* 7. A single character (will never be \, x, u, or 0-3)
*/
const escapeMatch = /\\(?:(\\)|x([\s\S]{0,2})|u(\{[^}]*\}?)|u([\s\S]{4})\\u([^{][\s\S]{0,3})|u([\s\S]{0,4})|([0-3]?[0-7]{1,2})|([\s\S])|$)/g
/**
* Replace raw escape character strings with their escape characters.
* @param raw A string where escape characters are represented as raw string
* values like `\'` rather than `'`.
* @param allowOctals If `true`, will process the now-deprecated octal escape
* sequences (ie, `\111`).
* @returns The processed string, with escape characters replaced by their
* respective actual Unicode characters.
*/
export default function unraw(raw: string, allowOctals = false): string {
return raw.replace(escapeMatch, function(
_,
backslash?: string,
hex?: string,
codePoint?: string,
unicodeWithSurrogate?: string,
surrogate?: string,
unicode?: string,
octal?: string,
singleCharacter?: string
): string {
// Compare groups to undefined because empty strings mean different errors
// Otherwise, `\u` would fail the same as `\` which is wrong.
if (backslash !== undefined) {
return '\\'
}
if (hex !== undefined) {
return parseHexadecimalCode(hex)
}
if (codePoint !== undefined) {
return parseUnicodeCodePointCode(codePoint)
}
if (unicodeWithSurrogate !== undefined) {
return parseUnicodeCode(unicodeWithSurrogate, surrogate)
}
if (unicode !== undefined) {
return parseUnicodeCode(unicode)
}
if (octal === '0') {
return '\0'
}
if (octal !== undefined) {
return parseOctalCode(octal, !allowOctals)
}
if (singleCharacter !== undefined) {
return parseSingleCharacterCode(singleCharacter)
}
throw new SyntaxError(errorMessages.get(ErrorType.EndOfString))
})
}
// NOTE: don't construct errors here or they'll have the wrong stack trace.
// NOTE: don't make custom error class; the JS engines use `SyntaxError`
/**
* Keys for possible error messages used by `unraw`.
* Note: These do _not_ map to actual error object types. All errors thrown
* are `SyntaxError`.
*/
// Don't use const enum or JS users won't be able to access the enum values
export enum ErrorType {
/**
* Thrown when a badly formed Unicode escape sequence is found. Possible
* reasons include the code being too short (`"\u25"`) or having invalid
* characters (`"\u2$A5"`).
*/
MalformedUnicode = 'MALFORMED_UNICODE',
/**
* Thrown when a badly formed hexadecimal escape sequence is found. Possible
* reasons include the code being too short (`"\x2"`) or having invalid
* characters (`"\x2$"`).
*/
MalformedHexadecimal = 'MALFORMED_HEXADECIMAL',
/**
* Thrown when a Unicode code point escape sequence has too high of a code
* point. The maximum code point allowed is `\u{10FFFF}`, so `\u{110000}` and
* higher will throw this error.
*/
CodePointLimit = 'CODE_POINT_LIMIT',
/**
* Thrown when an octal escape sequences is encountered and `allowOctals` is
* `false`. For example, `unraw("\234", false)`.
*/
OctalDeprecation = 'OCTAL_DEPRECATION',
/**
* Thrown only when a single backslash is found at the end of a string. For
* example, `"\\"` or `"test\\x24\\"`.
*/
EndOfString = 'END_OF_STRING',
}
/** Map of error message names to the full text of the message. */
export const errorMessages: Readonly<Map<ErrorType, string>> = new Map([
[ErrorType.MalformedUnicode, 'malformed Unicode character escape sequence'],
[
ErrorType.MalformedHexadecimal,
'malformed hexadecimal character escape sequence',
],
[
ErrorType.CodePointLimit,
'Unicode codepoint must not be greater than 0x10FFFF in escape sequence',
],
[
ErrorType.OctalDeprecation,
'"0"-prefixed octal literals and octal escape sequences are deprecated; ' +
'for octal literals use the "0o" prefix instead',
],
[ErrorType.EndOfString, 'malformed escape sequence at end of string'],
])