From a906adaf7047d6fe0c185c0f8835ea4172a2f022 Mon Sep 17 00:00:00 2001 From: Phil Date: Fri, 9 Aug 2024 10:59:23 -0400 Subject: [PATCH] http-client: fix base64 decoding of journal content The grpc gateway returns journal content as a base64-encoded string. Previously, we had been using `atob` to decode this, but that function does not work if the decoded content contains any byte values larger than 0x7f, because of course we can't have nice things in JS. So this adds one of those looks-insane-but-actually-works workarounds to make base64 decoding work properly with arbitrary utf-8 data. --- client/dist/esm/streams.js | 12 +++++++++++- client/dist/script/streams.js | 12 +++++++++++- client/dist/src/streams.ts | 12 +++++++++++- client/src/streams.ts | 12 +++++++++++- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/client/dist/esm/streams.js b/client/dist/esm/streams.js index 075f566..543b951 100644 --- a/client/dist/esm/streams.js +++ b/client/dist/esm/streams.js @@ -50,7 +50,17 @@ export function decodeContent() { transform(value, controller) { // Base64 decode the `content` field and send it as a chunk. if (value.content?.length) { - controller.enqueue(atob(value.content)); + // The `atob` function does not work properly if the decoded content contains any byte + // values over 0x7f, because "binary" in JS means that each byte gets represented as a + // UTF-16 code unit, which happens to be <= 0xff. I wish I was making this up: + // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem + const binary = atob(value.content); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < bytes.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + const text = new TextDecoder().decode(bytes); + controller.enqueue(text); } }, }); diff --git a/client/dist/script/streams.js b/client/dist/script/streams.js index d4c2c70..52c3730 100644 --- a/client/dist/script/streams.js +++ b/client/dist/script/streams.js @@ -56,7 +56,17 @@ function decodeContent() { transform(value, controller) { // Base64 decode the `content` field and send it as a chunk. if (value.content?.length) { - controller.enqueue(atob(value.content)); + // The `atob` function does not work properly if the decoded content contains any byte + // values over 0x7f, because "binary" in JS means that each byte gets represented as a + // UTF-16 code unit, which happens to be <= 0xff. I wish I was making this up: + // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem + const binary = atob(value.content); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < bytes.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + const text = new TextDecoder().decode(bytes); + controller.enqueue(text); } }, }); diff --git a/client/dist/src/streams.ts b/client/dist/src/streams.ts index 777d832..5a4d289 100644 --- a/client/dist/src/streams.ts +++ b/client/dist/src/streams.ts @@ -54,7 +54,17 @@ export function decodeContent() { transform(value, controller) { // Base64 decode the `content` field and send it as a chunk. if (value.content?.length) { - controller.enqueue(atob(value.content)); + // The `atob` function does not work properly if the decoded content contains any byte + // values over 0x7f, because "binary" in JS means that each byte gets represented as a + // UTF-16 code unit, which happens to be <= 0xff. I wish I was making this up: + // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem + const binary = atob(value.content); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < bytes.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + const text = new TextDecoder().decode(bytes); + controller.enqueue(text); } }, }); diff --git a/client/src/streams.ts b/client/src/streams.ts index 777d832..5a4d289 100644 --- a/client/src/streams.ts +++ b/client/src/streams.ts @@ -54,7 +54,17 @@ export function decodeContent() { transform(value, controller) { // Base64 decode the `content` field and send it as a chunk. if (value.content?.length) { - controller.enqueue(atob(value.content)); + // The `atob` function does not work properly if the decoded content contains any byte + // values over 0x7f, because "binary" in JS means that each byte gets represented as a + // UTF-16 code unit, which happens to be <= 0xff. I wish I was making this up: + // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem + const binary = atob(value.content); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < bytes.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + const text = new TextDecoder().decode(bytes); + controller.enqueue(text); } }, });