-
-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: string.base64.parse #1158
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,17 @@ contextualize(() => { | |
attest(b64url("fn5+").toString()).equals( | ||
'must be base64url-encoded (was "fn5+")' | ||
) | ||
|
||
const b64parse = type("string.base64.parse") | ||
attest(Buffer.from(b64parse("fn5+") as Uint8Array).toString("utf8")).snap( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally snap would recognize |
||
"~~~" | ||
) | ||
attest( | ||
Buffer.from(b64parse("V29yZA==") as Uint8Array).toString("utf8") | ||
).snap("Word") | ||
attest(b64("V29yZA").toString()).equals( | ||
'must be base64-encoded (was "V29yZA")' | ||
) | ||
}) | ||
|
||
it("digits", () => { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,125 @@ | ||
import { rootSchema, type TraversalContext } from "@ark/schema" | ||
import type { Module, Submodule } from "../../module.ts" | ||
import type { Predicate, of } from "../inference.ts" | ||
import type { Predicate, To, of } from "../inference.ts" | ||
import { arkModule } from "../utils.ts" | ||
import { regexStringNode } from "./utils.ts" | ||
|
||
// Using JS-only solution to parse Base64 as `Buffer.from` isn't available in browsers and `btoa` is | ||
// notoriously slow. | ||
// | ||
// Code adapted from base64-js: https://github.com/feross/base64-js/blob/master/index.js | ||
|
||
const lookup = | ||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".split("") | ||
const revLookup = lookup.reduce( | ||
(obj, char, i) => { | ||
obj[char.charCodeAt(0)] = i | ||
return obj | ||
}, | ||
{} as Record<number, number> | ||
) | ||
|
||
const getLens = (b64: string) => { | ||
const len = b64.length | ||
|
||
if (len % 4 > 0) | ||
throw new SyntaxError("Invalid string. Length must be a multiple of 4") | ||
|
||
// Trim off extra bytes after placeholder bytes are found | ||
// See: https://github.com/beatgammit/base64-js/issues/42 | ||
let validLen = b64.indexOf("=") | ||
if (validLen === -1) validLen = len | ||
|
||
const placeHoldersLen = validLen === len ? 0 : 4 - (validLen % 4) | ||
|
||
return [validLen, placeHoldersLen] | ||
} | ||
|
||
const byteLength = (validLen: number, placeHoldersLen: number) => | ||
((validLen + placeHoldersLen) * 3) / 4 - placeHoldersLen | ||
|
||
const parseB64 = (b64: string) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. I'm tempted to just say let's use the builtin (btoa) even if it's much slower because people complaining about the perf could just write their own version of the parse that uses one of these libraries, but this is a lot to maintain and add to the bundle size for a perf improvement (albeit significant) on a relatively niche feature. Another option for this kind of type would be to create a new package like I do appreciate the work that went into adapting an optimized version of this, so would rather see it available through a package like that than to go to waste. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd argue that the feature wouldn't be that niche as uploading binary data in the form of a base64 string is fairly common, and I'd hate for people to use That said, the maintenance and bundle size arguments are valid, so maybe an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, especially until we can support precompilation as a build step for people who care about bundle size, we have to be pretty conservative about this sort of thing. If you want to migrate this with the changes mentioned to |
||
const [validLen, placeHoldersLen] = getLens(b64) | ||
const arr = new Uint8Array(byteLength(validLen, placeHoldersLen)) | ||
|
||
// if there are placeholders, only get up to the last complete 4 chars | ||
const len = placeHoldersLen > 0 ? validLen - 4 : validLen | ||
|
||
let tmp: number | ||
let curByte = 0 | ||
|
||
let i: number | ||
for (i = 0; i < len; i += 4) { | ||
tmp = | ||
(revLookup[b64.charCodeAt(i)] << 18) | | ||
(revLookup[b64.charCodeAt(i + 1)] << 12) | | ||
(revLookup[b64.charCodeAt(i + 2)] << 6) | | ||
revLookup[b64.charCodeAt(i + 3)] | ||
arr[curByte++] = (tmp >> 16) & 0xff | ||
arr[curByte++] = (tmp >> 8) & 0xff | ||
arr[curByte++] = tmp & 0xff | ||
} | ||
|
||
if (placeHoldersLen === 2) { | ||
tmp = | ||
(revLookup[b64.charCodeAt(i)] << 2) | | ||
(revLookup[b64.charCodeAt(i + 1)] >> 4) | ||
arr[curByte++] = tmp & 0xff | ||
} | ||
|
||
if (placeHoldersLen === 1) { | ||
tmp = | ||
(revLookup[b64.charCodeAt(i)] << 10) | | ||
(revLookup[b64.charCodeAt(i + 1)] << 4) | | ||
(revLookup[b64.charCodeAt(i + 2)] >> 2) | ||
arr[curByte++] = (tmp >> 8) & 0xff | ||
arr[curByte++] = tmp & 0xff | ||
} | ||
|
||
return arr | ||
} | ||
|
||
const base64Description = "base64-encoded" | ||
const base64UrlDescription = "base64url-encoded" | ||
|
||
export const writeBase64SyntaxErrorProblem = (error: unknown): string => { | ||
if (!(error instanceof SyntaxError)) throw error | ||
return `must be ${base64Description} (${error})` | ||
} | ||
|
||
const base64Pattern = | ||
/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/ | ||
const base64UrlPattern = | ||
/^(?:[A-Za-z0-9_-]{4})*(?:[A-Za-z0-9_-]{2}(?:==|%3D%3D)?|[A-Za-z0-9_-]{3}(?:=|%3D)?)?$/ | ||
|
||
export const base64 = arkModule({ | ||
root: regexStringNode( | ||
/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/, | ||
"base64-encoded" | ||
), | ||
url: regexStringNode( | ||
/^(?:[A-Za-z0-9_-]{4})*(?:[A-Za-z0-9_-]{2}(?:==|%3D%3D)?|[A-Za-z0-9_-]{3}(?:=|%3D)?)?$/, | ||
"base64url-encoded" | ||
) | ||
root: regexStringNode(base64Pattern, base64Description), | ||
url: regexStringNode(base64UrlPattern, base64UrlDescription), | ||
parse: rootSchema({ | ||
in: "string", | ||
declaredOut: rootSchema(Uint8Array), | ||
morphs: (s: string, ctx: TraversalContext) => { | ||
if (s.length === 0) return new Uint8Array(0) | ||
|
||
try { | ||
return parseB64(s) | ||
Abion47 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} catch (e) { | ||
return ctx.error({ | ||
code: "predicate", | ||
expected: base64Description, | ||
problem: writeBase64SyntaxErrorProblem(e) | ||
}) | ||
} | ||
} | ||
}) | ||
}) | ||
|
||
declare namespace string { | ||
export type base64 = of<string, Predicate<"base64">> | ||
|
||
export namespace base64 { | ||
export type url = of<string, Predicate<"base64.url">> | ||
export type parse = of<string, Predicate<"base64.parse">> | ||
} | ||
} | ||
|
||
|
@@ -30,5 +131,6 @@ export declare namespace base64 { | |
export type $ = { | ||
root: string.base64 | ||
url: string.base64.url | ||
parse: (In: string.base64) => To<Uint8Array> | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's move this group of tests for
base64
to its own file likeip.test.ts
or similar.