Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use s3 api for directory listings #44

Merged
merged 7 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
node_modules/
.wrangler/
dist/
.dev.vars
213 changes: 56 additions & 157 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"build:handlebars": "node scripts/compile-handlebars.js"
},
"devDependencies": {
"@aws-sdk/client-s3": "^3.421.0",
"@cloudflare/workers-types": "^4.20230922.0",
"@reporters/github": "^1.5.3",
"@types/node": "^20.7.0",
Expand All @@ -30,6 +29,7 @@
"wrangler": "^3.10.0"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.423.0",
"handlebars": "^4.7.8",
"nodejs-latest-linker": "^1.6.0",
"zod": "^3.22.2"
Expand Down
9 changes: 9 additions & 0 deletions src/constants/limits.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/**
* Max amount of retries for S3 requests
*/
export const S3_RETRY_LIMIT = 3;

/**
* Max amount of keys to be returned in a S3 request
*/
export const S3_MAX_KEYS = 1000;
17 changes: 17 additions & 0 deletions src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ export interface Env {
* R2 bucket we read from
*/
R2_BUCKET: R2Bucket;
/**
* Endpoint to hit when using the S3 api.
*/
S3_ENDPOINT: string;
/**
* Id of the api token used for the S3 api.
* The token needs >=Object Read only permissions
*/
S3_ACCESS_KEY_ID: string;
/**
* Secret of the api token used for the S3 api
*/
S3_ACCESS_KEY_SECRET: string;
/**
* Bucket name
*/
BUCKET_NAME: string;
/**
* Directory listing toggle
* on - Enabled for all paths
Expand Down
122 changes: 96 additions & 26 deletions src/handlers/strategies/directoryListing.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
import {
ListObjectsV2Command,
ListObjectsV2CommandOutput,
S3Client,
_Object,
} from '@aws-sdk/client-s3';
import Handlebars from 'handlebars';
import { Env } from '../../env';
import responses from '../../commonResponses';
Expand All @@ -6,6 +12,7 @@ import { getFile } from './serveFile';

// Imports the Precompiled Handlebars Template
import htmlTemplate from '../../templates/directoryListing.out.js';
import { S3_MAX_KEYS, S3_RETRY_LIMIT } from '../../constants/limits';

// Applies the Template into a Handlebars Template Function
const handleBarsTemplate = Handlebars.template(htmlTemplate);
Expand All @@ -17,14 +24,14 @@ const handleBarsTemplate = Handlebars.template(htmlTemplate);
* @param url Parsed url of the request
* @param request Request object itself
* @param delimitedPrefixes Directories in the bucket
* @param listingResponse Listing response to render
* @returns {@link DirectoryListingResponse} instance
* @param objects Objects in the bucket
* @returns {@link Response} instance
*/
export function renderDirectoryListing(
url: URL,
request: Request,
delimitedPrefixes: Set<string>,
objects: R2Object[],
objects: _Object[],
env: Env
): Response {
// Holds all the html for each directory and file we're listing
Expand Down Expand Up @@ -57,25 +64,25 @@ export function renderDirectoryListing(

// Renders all the Files within the Directory
objects.forEach(object => {
const name = object.key;
const name = object.Key;

// Find the most recent date a file in this
// directory was modified, we'll use it
// in the `Last-Modified` header
if (lastModified === undefined || object.uploaded > lastModified) {
lastModified = object.uploaded;
if (lastModified === undefined || object.LastModified! > lastModified) {
lastModified = object.LastModified!;
}

let dateStr = object.uploaded.toISOString();
let dateStr = object.LastModified!.toISOString();

dateStr = dateStr.split('.')[0].replace('T', ' ');
dateStr = dateStr.slice(0, dateStr.lastIndexOf(':')) + 'Z';

tableElements.push({
href: `${urlPathname}${encodeURIComponent(name)}`,
href: `${urlPathname}${encodeURIComponent(name ?? '')}`,
name,
lastModified: dateStr,
size: niceBytes(object.size),
size: niceBytes(object.Size!),
});
});

Expand All @@ -97,6 +104,49 @@ export function renderDirectoryListing(
});
}

/**
* Send a request to R2 to get the objects & paths in a directory
* @param client {@link S3Client} to use for the request
* @param bucketPath Path in R2 bucket
* @param cursor Where to begin the request from, for pagination
* @param env Worker env
* @returns A {@link ListObjectsV2CommandOutput}
* @throws When all retries are exhausted and no response was returned
*/
async function fetchR2Result(
client: S3Client,
bucketPath: string,
cursor: string | undefined,
env: Env
): Promise<ListObjectsV2CommandOutput> {
let retriesRemaining = S3_RETRY_LIMIT;
while (retriesRemaining > 0) {
try {
// Send request to R2
const result = await client.send(
new ListObjectsV2Command({
Bucket: env.BUCKET_NAME,
Prefix: bucketPath,
Delimiter: '/',
MaxKeys: S3_MAX_KEYS,
ContinuationToken: cursor,
})
);

// Request succeeded, no need for any retries
return result;
} catch (err) {
// Got an error, let's log it and retry
console.error(`R2 ListObjectsV2 error: ${err}`);
ovflowd marked this conversation as resolved.
Show resolved Hide resolved

retriesRemaining--;
}
}

// R2 isn't having a good day, return a 500
throw new Error(`R2 failed listing path ${bucketPath}`);
}

/**
* Directory listing
* @param url Parsed url of the request
Expand All @@ -111,41 +161,61 @@ export async function listDirectory(
env: Env
): Promise<Response> {
const delimitedPrefixes = new Set<string>();
const objects: R2Object[] = [];
const objects: _Object[] = []; // s3 sdk types are weird

// Create an S3 client instance to interact with the bucket.
// There is a limit in the size of the response that
// a binding can return. We kept hitting it due to the
// size of our paths, causing us to send a lot of requests
// to R2 which in turn added a lot of latency. The S3 api
// doesn't have that response body size constraint so we're
// using it for now.
const client = new S3Client({
ovflowd marked this conversation as resolved.
Show resolved Hide resolved
region: 'auto',
endpoint: env.S3_ENDPOINT,
credentials: {
accessKeyId: env.S3_ACCESS_KEY_ID,
secretAccessKey: env.S3_ACCESS_KEY_SECRET,
},
});

let truncated = true;
let cursor: string | undefined;

while (truncated) {
const result = await env.R2_BUCKET.list({
prefix: bucketPath,
delimiter: '/',
const result: ListObjectsV2CommandOutput = await fetchR2Result(
client,
bucketPath,
cursor,
});
env
);

// R2 sends us back the absolute path of the object, cut it
result.delimitedPrefixes.forEach(prefix =>
delimitedPrefixes.add(prefix.substring(bucketPath.length))
);
result.CommonPrefixes?.forEach(path => {
if (path.Prefix !== undefined)
delimitedPrefixes.add(path.Prefix.substring(bucketPath.length));
});

const hasIndexFile = result.objects.find(object =>
object.key.endsWith('index.html')
);
const hasIndexFile = result.Contents
? result.Contents.some(object => object.Key?.endsWith('index.html'))
: false;

if (hasIndexFile !== undefined && hasIndexFile !== null) {
if (hasIndexFile) {
return getFile(url, request, `${bucketPath}index.html`, env);
}

// R2 sends us back the absolute path of the object, cut it
result.objects.forEach(object =>
result.Contents?.forEach(object =>
objects.push({
...object,
key: object.key.substring(bucketPath.length),
} as R2Object)
Key: object.Key?.substring(bucketPath.length),
flakey5 marked this conversation as resolved.
Show resolved Hide resolved
})
);

truncated = result.truncated;
cursor = result.truncated ? result.cursor : undefined;
// Default this to false just so we don't end up in a never ending
// loop if they don't send this back for whatever reason
truncated = result.IsTruncated ?? false;
cursor = truncated ? result.NextContinuationToken : undefined;
}

// Directory needs either subdirectories or files in it cannot be empty
Expand Down
49 changes: 48 additions & 1 deletion tests/e2e/directory.test.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,61 @@
import { after, before, describe, it } from 'node:test';
import assert from 'node:assert';
import { readFileSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import http from 'http';
import { Miniflare } from 'miniflare';

async function startS3Mock(): Promise<http.Server> {
const server = http.createServer((req, res) => {
const url = new URL(req.url!, `http://${req.headers.host}`);

let xmlFilePath = './tests/e2e/test-data/expected-s3/';

// Check if it's a path that's supposed to exist in
// later tests. If so, return a S3 response indicating that
// the path exists. Otherwise return a S3 response indicating
// that the path doesn't exist
if (
['nodejs/release/', 'nodejs/', 'metrics/'].includes(
flakey5 marked this conversation as resolved.
Show resolved Hide resolved
url.searchParams.get('prefix')!
)
) {
xmlFilePath += 'ListObjectsV2-exists.xml';
} else {
xmlFilePath += 'ListObjectsV2-does-not-exist.xml';
}

const listObjectsResponse = readFileSync(xmlFilePath, {
encoding: 'utf-8',
});

res.write(listObjectsResponse);
res.end();
});
server.listen(8080);

return server;
}

describe('Directory Tests (Restricted Directory Listing)', () => {
let s3Mock: http.Server;
let mf: Miniflare;
let url: URL;
before(async () => {
s3Mock = await startS3Mock();

// Setup miniflare
mf = new Miniflare({
scriptPath: './dist/worker.js',
modules: true,
bindings: {
BUCKET_NAME: 'dist-prod',
// S3_ENDPOINT needs to be an ip here otherwise s3 sdk will try to hit
// the bucket's subdomain (e.g. http://dist-prod.localhost)
S3_ENDPOINT: 'http://127.0.0.1:8080',
S3_ACCESS_KEY_ID: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
S3_ACCESS_KEY_SECRET:
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
DIRECTORY_LISTING: 'restricted',
FILE_CACHE_CONTROL: 'no-store',
DIRECTORY_CACHE_CONTROL: 'no-store',
Expand Down Expand Up @@ -97,5 +141,8 @@ describe('Directory Tests (Restricted Directory Listing)', () => {
});

// Cleanup Miniflare
after(async () => mf.dispose());
after(async () => {
await mf.dispose();
s3Mock.close();
});
});
2 changes: 1 addition & 1 deletion tests/e2e/test-data/expected-html/dist.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<!DOCTYPE html><html><head><title>Index of /dist/</title><meta name='viewport' content='width=device-width, initial-scale=1.0' /><meta charset='utf-8' /><style type='text/css'>td { padding-right: 16px; text-align: right; font-family: monospace }td:nth-of-type(1) { text-align: left; overflow-wrap: anywhere }td:nth-of-type(3) { white-space: nowrap } th { text-align: left; } @media(prefers-color-scheme: dark) { body { color: white; background-color:#1c1b22; } a { color: #3391ff; } a:visited { color: #C63B65; } }</style></head><body><h1>Index of /dist/</h1><table><tr><th>Filename</th><th>Modified</th><th>Size</th></tr><tr><td><a href='../'>../</a></td><td>-</td><td>-</td></tr><tr><td><a href='/dist/latest/'>latest/</a></td><td>-</td><td>-</td></tr><tr><td><a href='/dist/index.json'>index.json</a></td><td>2023-09-12 05:43Z</td><td>18 B</td></tr></table></body></html>
<!DOCTYPE html><html><head><title>Index of /dist/</title><meta name='viewport' content='width=device-width, initial-scale=1.0' /><meta charset='utf-8' /><style type='text/css'>td { padding-right: 16px; text-align: right; font-family: monospace }td:nth-of-type(1) { text-align: left; overflow-wrap: anywhere }td:nth-of-type(3) { white-space: nowrap } th { text-align: left; } @media(prefers-color-scheme: dark) { body { color: white; background-color:#1c1b22; } a { color: #3391ff; } a:visited { color: #C63B65; } }</style></head><body><h1>Index of /dist/</h1><table><tr><th>Filename</th><th>Modified</th><th>Size</th></tr><tr><td><a href='../'>../</a></td><td>-</td><td>-</td></tr><tr><td><a href='/dist/latest/'>latest/</a></td><td>-</td><td>-</td></tr><tr><td><a href='/dist/index.json'>index.json</a></td><td>2023-09-12 05:43Z</td><td>18 B</td></tr></table></body></html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01">
<Name>dist-prod</Name>
<Prefix />
<Marker />
<MaxKeys>1000</MaxKeys>
<IsTruncated>false</IsTruncated>
</ListBucketResult>
16 changes: 16 additions & 0 deletions tests/e2e/test-data/expected-s3/ListObjectsV2-exists.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01">
<Name>dist-prod</Name>
<Prefix />
<Marker />
<MaxKeys>1000</MaxKeys>
<IsTruncated>false</IsTruncated>
<CommonPrefixes>
<Prefix>nodejs/release/latest/</Prefix>
</CommonPrefixes>
<Contents>
<ETag>"asd123"</ETag>
<Key>nodejs/release/index.json</Key>
<LastModified>2023-09-12T05:43:00.000Z</LastModified>
<Size>18</Size>
</Contents>
</ListBucketResult>
Loading
Loading