From 31ccaa60a5c81836f6f4b9c7b12dd87b3b569d38 Mon Sep 17 00:00:00 2001 From: flakey5 <73616808+flakey5@users.noreply.github.com> Date: Sun, 14 Apr 2024 12:11:20 -0700 Subject: [PATCH] src: implement R2Provider Adds the Provider interface and most of the implementation of R2Provider as discussed in #111. Note that as of right now the provider isn't being used, this will happen in future prs. --- src/constants/limits.ts | 2 +- src/constants/r2Prefixes.ts | 23 ++--- src/handlers/get.ts | 2 +- src/providers/provider.ts | 92 ++++++++++++++++++++ src/providers/r2Provider.ts | 153 ++++++++++++++++++++++++++++++++++ src/utils/path.ts | 8 +- src/utils/provider.ts | 29 +++++++ tests/unit/utils/path.test.ts | 76 ++++++----------- 8 files changed, 318 insertions(+), 67 deletions(-) create mode 100644 src/providers/provider.ts create mode 100644 src/providers/r2Provider.ts create mode 100644 src/utils/provider.ts diff --git a/src/constants/limits.ts b/src/constants/limits.ts index 6d9757f..5c1f95d 100644 --- a/src/constants/limits.ts +++ b/src/constants/limits.ts @@ -1,5 +1,5 @@ /** - * Max amount of retries for S3 requests + * Max amount of retries for R2 requests */ export const R2_RETRY_LIMIT = 5; diff --git a/src/constants/r2Prefixes.ts b/src/constants/r2Prefixes.ts index d106143..4d4fd4d 100644 --- a/src/constants/r2Prefixes.ts +++ b/src/constants/r2Prefixes.ts @@ -24,14 +24,15 @@ export const VIRTUAL_DIRS: Record> = { ), }; -export const URL_TO_BUCKET_PATH_MAP: Record string> = { - dist: (url): string => - DIST_PATH_PREFIX + (url.pathname.substring('/dist'.length) || '/'), - download: (url): string => - DOWNLOAD_PATH_PREFIX + (url.pathname.substring('/download'.length) || '/'), - docs: (url): string => - DOCS_PATH_PREFIX + (url.pathname.substring('/docs'.length) || '/'), - api: (url): string => - API_PATH_PREFIX + (url.pathname.substring('/api'.length) || '/'), - metrics: (url): string => url.pathname.substring(1), // substring to cut off the / -}; +export const URL_TO_BUCKET_PATH_MAP: Record string> = + { + dist: (path): string => + DIST_PATH_PREFIX + (path.substring('/dist'.length) || '/'), + download: (path): string => + DOWNLOAD_PATH_PREFIX + (path.substring('/download'.length) || '/'), + docs: (path): string => + DOCS_PATH_PREFIX + (path.substring('/docs'.length) || '/'), + api: (path): string => + API_PATH_PREFIX + (path.substring('/api'.length) || '/'), + metrics: (path): string => path.substring(1), // substring to cut off the / + }; diff --git a/src/handlers/get.ts b/src/handlers/get.ts index 7e00d63..dcdff0f 100644 --- a/src/handlers/get.ts +++ b/src/handlers/get.ts @@ -33,7 +33,7 @@ const getHandler: Handler = async (request, ctx) => { return responses.badRequest(); } - const bucketPath = mapUrlPathToBucketPath(requestUrl, ctx.env); + const bucketPath = mapUrlPathToBucketPath(requestUrl.pathname, ctx.env); if (typeof bucketPath === 'undefined') { // Directory listing is restricted and we're not on diff --git a/src/providers/provider.ts b/src/providers/provider.ts new file mode 100644 index 0000000..92b88a4 --- /dev/null +++ b/src/providers/provider.ts @@ -0,0 +1,92 @@ +/** + * A Provider is essentially an abstracted API client. This is the interface + * we interact with to head files, get files, and listing directories. + */ +export interface Provider { + headFile(path: string): Promise; + + getFile( + path: string, + options?: GetFileOptions + ): Promise; + + readDirectory(path: string): Promise; +} + +/** + * Headers returned by the http request made by the Provider to its data source. + * Can be be forwarded to the client. + */ +export type HttpResponseHeaders = { + etag: string; + 'accept-range': string; + 'access-control-allow-origin'?: string; + 'cache-control': string; + expires?: string; + 'last-modified': string; + 'content-encoding'?: string; + 'content-type'?: string; + 'content-language'?: string; + 'content-disposition'?: string; + 'content-length': string; +}; + +export type HeadFileResult = { + /** + * Headers to send the client + */ + httpHeaders: HttpResponseHeaders; +}; + +export type GetFileOptions = { + /** + * R2 supports every conditional header except `If-Range` + * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Conditional_requests#conditional_headers + * @see https://developers.cloudflare.com/r2/api/workers/workers-api-reference/#conditional-operations + */ + conditionalHeaders?: { + ifMatch?: string; + ifNoneMatch?: string; + ifModifiedSince?: Date; + ifUnmodifiedSince?: Date; + }; + rangeHeader?: string; +}; +export type GetFileResult = { + contents?: ReadableStream | null; + /** + * Status code to send the client + */ + httpStatusCode: number; + /** + * Headers to send the client + */ + httpHeaders: HttpResponseHeaders; +}; + +export type File = { + name: string; + lastModified: Date; + size: number; +}; + +export type R2ReadDirectoryResult = { + subdirectories: string[]; + files: File[]; +}; + +export type OriginReadDirectoryResult = { + body: ReadableStream | null; + /** + * Status code to send the client + */ + httpStatusCode: number; + /** + * Headers to send the client + */ + httpHeaders: HttpResponseHeaders; +}; + +export type ReadDirectoryResult = + | R2ReadDirectoryResult + | OriginReadDirectoryResult; diff --git a/src/providers/r2Provider.ts b/src/providers/r2Provider.ts new file mode 100644 index 0000000..a14d7f3 --- /dev/null +++ b/src/providers/r2Provider.ts @@ -0,0 +1,153 @@ +import { CACHE_HEADERS } from '../constants/cache'; +import { R2_RETRY_LIMIT } from '../constants/limits'; +import { Context } from '../context'; +import { objectHasBody } from '../utils/object'; +import { mapUrlPathToBucketPath } from '../utils/path'; +import { retryWrapper } from '../utils/provider'; +import { + GetFileOptions, + GetFileResult, + HeadFileResult, + HttpResponseHeaders, + Provider, + ReadDirectoryResult, +} from './provider'; + +type R2ProviderCtorOptions = { + ctx: Context; +}; + +export class R2Provider implements Provider { + private ctx: Context; + + constructor({ ctx }: R2ProviderCtorOptions) { + this.ctx = ctx; + } + + async headFile(path: string): Promise { + const r2Path = mapUrlPathToBucketPath(path, this.ctx.env); + if (r2Path === undefined) { + return undefined; + } + + const object = await retryWrapper( + async () => await this.ctx.env.R2_BUCKET.head(r2Path), + R2_RETRY_LIMIT, + this.ctx.sentry + ); + + if (object === null) { + return undefined; + } + + return { + httpHeaders: r2MetadataToHeaders(object, 200), + }; + } + + async getFile( + path: string, + options?: GetFileOptions + ): Promise { + const r2Path = mapUrlPathToBucketPath(path, this.ctx.env); + if (r2Path === undefined) { + return undefined; + } + + const object = await retryWrapper( + async () => { + return await this.ctx.env.R2_BUCKET.get(r2Path, { + onlyIf: { + etagMatches: options?.conditionalHeaders?.ifMatch, + etagDoesNotMatch: options?.conditionalHeaders?.ifNoneMatch, + uploadedBefore: options?.conditionalHeaders?.ifUnmodifiedSince, + uploadedAfter: options?.conditionalHeaders?.ifModifiedSince, + }, + }); + }, + R2_RETRY_LIMIT, + this.ctx.sentry + ); + + if (object === null) { + return undefined; + } + + const doesHaveBody = objectHasBody(object); + const httpStatusCode = determineHttpStatusCode(doesHaveBody, options); + + return { + contents: doesHaveBody ? (object as R2ObjectBody).body : undefined, + httpStatusCode, + httpHeaders: r2MetadataToHeaders(object, httpStatusCode), + }; + } + + readDirectory(_: string): Promise { + // We will use the S3Provider here + throw new Error('Method not implemented.'); + } +} + +function r2MetadataToHeaders( + object: R2Object, + httpStatusCode: number +): HttpResponseHeaders { + const { httpMetadata } = object; + + return { + etag: object.httpEtag, + 'accept-range': 'bytes', + // https://github.com/nodejs/build/blob/e3df25d6a23f033db317a53ab1e904c953ba1f00/ansible/www-standalone/resources/config/nodejs.org?plain=1#L194-L196 + 'access-control-allow-origin': object.key.endsWith('.json') + ? '*' + : undefined, + 'cache-control': + httpStatusCode === 200 ? CACHE_HEADERS.success : CACHE_HEADERS.failure, + expires: httpMetadata?.cacheExpiry?.toUTCString(), + 'last-modified': object.uploaded.toUTCString(), + 'content-language': httpMetadata?.contentLanguage, + 'content-disposition': httpMetadata?.contentDisposition, + 'content-length': object.size.toString(), + }; +} + +function areConditionalHeadersPresent( + options?: Pick +): boolean { + if (options === undefined || options.conditionalHeaders === undefined) { + return false; + } + + const { conditionalHeaders } = options; + + return ( + conditionalHeaders.ifMatch !== undefined || + conditionalHeaders.ifNoneMatch !== undefined || + conditionalHeaders.ifModifiedSince !== undefined || + conditionalHeaders.ifUnmodifiedSince !== undefined + ); +} + +function determineHttpStatusCode( + objectHasBody: boolean, + options?: GetFileOptions +): number { + if (objectHasBody) { + if (options?.rangeHeader !== undefined) { + // Range header is present and we have a body, most likely partial + return 206; + } + + // We have the full object body + return 200; + } + + if (areConditionalHeadersPresent(options)) { + // No body due to precondition failure + return 412; + } + + // We weren't given a body and preconditions succeeded. + return 304; +} diff --git a/src/utils/path.ts b/src/utils/path.ts index ecf35b5..6c9e348 100644 --- a/src/utils/path.ts +++ b/src/utils/path.ts @@ -16,10 +16,10 @@ import { Env } from '../env'; * if the eyeball should not be trying to access the resource */ export function mapUrlPathToBucketPath( - url: URL, + path: string, env: Pick ): string | undefined { - const [, basePath, ...pathPieces] = url.pathname.split('/'); // 'docs', ['asd', '123'] + const [, basePath, ...pathPieces] = path.split('/'); // 'docs', ['asd', '123'] const mappedDist = `${DIST_PATH_PREFIX}/${pathPieces[0]}`; @@ -44,11 +44,11 @@ export function mapUrlPathToBucketPath( } if (basePath in URL_TO_BUCKET_PATH_MAP) { - return URL_TO_BUCKET_PATH_MAP[basePath](url); + return URL_TO_BUCKET_PATH_MAP[basePath](path); } if (env.DIRECTORY_LISTING !== 'restricted') { - return url.pathname.substring(1); + return path.substring(1); } return undefined; diff --git a/src/utils/provider.ts b/src/utils/provider.ts new file mode 100644 index 0000000..3a16bf0 --- /dev/null +++ b/src/utils/provider.ts @@ -0,0 +1,29 @@ +import { Toucan } from 'toucan-js'; + +/** + * Utility for retrying request sent to a provider's data source + * @param request Function that performs the request + * @returns Result returned from {@link request} + */ +export async function retryWrapper( + request: () => Promise, + retryLimit: number, + sentry?: Toucan +): Promise { + let r2Error: unknown = undefined; + for (let i = 0; i < retryLimit; i++) { + try { + const result = await request(); + return result; + } catch (err) { + console.error(`R2Provider error: ${err}`); + r2Error = err; + } + } + + if (sentry !== undefined) { + sentry.captureException(r2Error); + } + + throw r2Error; +} diff --git a/tests/unit/utils/path.test.ts b/tests/unit/utils/path.test.ts index d2ffabd..0b8a0ba 100644 --- a/tests/unit/utils/path.test.ts +++ b/tests/unit/utils/path.test.ts @@ -20,84 +20,63 @@ describe('mapUrlPathToBucketPath', () => { }); it('converts `/unknown-base-path` to undefined when DIRECTORY_LISTING=restricted', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/unknown-base-path'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/unknown-base-path', { + DIRECTORY_LISTING: 'restricted', + }); assert.strictEqual(result, undefined); }); it('converts `/unknown-base-path` to `unknown-base-path` when DIRECTORY_LISTING=on', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/unknown-base-path'), - { - DIRECTORY_LISTING: 'on', - } - ); + const result = mapUrlPathToBucketPath('/unknown-base-path', { + DIRECTORY_LISTING: 'on', + }); assert.strictEqual(result, 'unknown-base-path'); }); it('converts `/dist` to `nodejs/release`', () => { - const result = mapUrlPathToBucketPath(new URL('http://localhost/dist'), { + const result = mapUrlPathToBucketPath('/dist', { DIRECTORY_LISTING: 'restricted', }); assert.strictEqual(result, 'nodejs/release/'); }); it('converts `/dist/latest` to `nodejs/release/v.X.X.X`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/dist/latest'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/dist/latest', { + DIRECTORY_LISTING: 'restricted', + }); assert.match(result ?? '', /^nodejs\/release\/v.\d+\.\d+\.\d+\/$/); }); it('converts `/download` to `nodejs`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/download'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/download', { + DIRECTORY_LISTING: 'restricted', + }); assert.strictEqual(result, 'nodejs/'); }); it('converts `/download/release` to `nodejs/release`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/download/release'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/download/release', { + DIRECTORY_LISTING: 'restricted', + }); assert.strictEqual(result, 'nodejs/release'); }); it('converts `/download/release/latest` to `nodejs/release/v.X.X.X`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/download/release/latest'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/download/release/latest', { + DIRECTORY_LISTING: 'restricted', + }); assert.match(result ?? '', /^nodejs\/release\/v.\d+\.\d+\.\d+\/$/); }); it('converts `/docs/latest` to `nodejs/release/v.X.X.X/docs/`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/docs/latest'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/docs/latest', { + DIRECTORY_LISTING: 'restricted', + }); assert.match(result ?? '', /^nodejs\/release\/v.\d+\.\d+\.\d+\/docs\/$/); }); it('converts `/api` to `nodejs/release/v.X.X.X/docs/api/`', () => { - const result = mapUrlPathToBucketPath(new URL('http://localhost/api'), { + const result = mapUrlPathToBucketPath('/api', { DIRECTORY_LISTING: 'restricted', }); assert.match( @@ -107,12 +86,9 @@ describe('mapUrlPathToBucketPath', () => { }); it('converts `/api/assert.html` to `nodejs/release/v.X.X.X/docs/api/assert.html`', () => { - const result = mapUrlPathToBucketPath( - new URL('http://localhost/api/assert.html'), - { - DIRECTORY_LISTING: 'restricted', - } - ); + const result = mapUrlPathToBucketPath('/api/assert.html', { + DIRECTORY_LISTING: 'restricted', + }); assert.match( result ?? '', /^nodejs\/release\/v.\d+\.\d+\.\d+\/docs\/api\/assert\.html$/