From 9e752e3d762bb8077eb5886f9fbddad2b6a2874b Mon Sep 17 00:00:00 2001 From: Brian DeHamer Date: Mon, 3 Jun 2024 07:56:25 -0700 Subject: [PATCH] batch processing w/ exponential backoff (#79) Signed-off-by: Brian DeHamer --- README.md | 15 ++++++++-- __tests__/main.test.ts | 54 +++++++++++++++++++++++++++++++++--- __tests__/subject.test.ts | 2 +- action.yml | 2 +- dist/index.js | 58 ++++++++++++++++++++++----------------- src/index.ts | 4 +-- src/main.ts | 29 ++++++++++++-------- src/subject.ts | 36 ++++++++++++++---------- 8 files changed, 137 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index fb0cfd83..64823211 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ See [action.yml](action.yml) with: # Path to the artifact serving as the subject of the attestation. Must # specify exactly one of "subject-path" or "subject-digest". May contain - # a glob pattern or list of paths (total subject count cannot exceed 64). + # a glob pattern or list of paths (total subject count cannot exceed 2500). subject-path: # SHA256 digest of the subject for the attestation. Must be in the form @@ -115,6 +115,15 @@ If multiple subjects are being attested at the same time, each attestation will be written to the output file on a separate line (using the [JSON Lines][7] format). +## Attestation Limits + +### Subject Limits + +No more than 2500 subjects can be attested at the same time. Subjects will be +processed in batches 50. After the initial group of 50, each subsequent batch +will incur an exponentially increasing amount of delay (capped at 1 minute of +delay per batch) to avoid overwhelming the attestation API. + ## Examples ### Identify Subject by Path @@ -175,8 +184,8 @@ fully-qualified image name (e.g. "ghcr.io/user/app" or "acme.azurecr.io/user/app"). Do NOT include a tag as part of the image name -- the specific image being attested is identified by the supplied digest. -> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the -> registry portion of the image name. +> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the registry +> portion of the image name. ```yaml name: build-attested-image diff --git a/__tests__/main.test.ts b/__tests__/main.test.ts index 31309651..da551c3f 100644 --- a/__tests__/main.test.ts +++ b/__tests__/main.test.ts @@ -46,8 +46,7 @@ const defaultInputs: main.RunInputs = { pushToRegistry: false, githubToken: '', privateSigning: false, - batchSize: 50, - batchDelay: 5000 + batchSize: 50 } describe('action', () => { @@ -355,8 +354,7 @@ describe('action', () => { predicateType, predicate, githubToken: 'gh-token', - batchSize: 2, - batchDelay: 500 + batchSize: 2 } await main.run(inputs) @@ -377,6 +375,54 @@ describe('action', () => { expect(scope.isDone()).toBe(true) }) }) + + describe('when the subject count exceeds the max', () => { + let dir = '' + const filename = 'subject' + + beforeEach(async () => { + const subjectCount = 2501 + const content = 'file content' + + // Set-up temp directory + const tmpDir = await fs.realpath(os.tmpdir()) + dir = await fs.mkdtemp(tmpDir + path.sep) + + // Add files for glob testing + for (let i = 0; i < subjectCount; i++) { + await fs.writeFile(path.join(dir, `${filename}-${i}`), content) + } + + // Set the GH context with private repository visibility and a repo owner. + setGHContext({ + payload: { repository: { visibility: 'private' } }, + repo: { owner: 'foo', repo: 'bar' } + }) + }) + + afterEach(async () => { + // Clean-up temp directory + await fs.rm(dir, { recursive: true }) + }) + + it('sets a failed status', async () => { + const inputs: main.RunInputs = { + ...defaultInputs, + subjectPath: path.join(dir, `${filename}-*`), + predicateType, + predicate, + githubToken: 'gh-token' + } + await main.run(inputs) + + expect(runMock).toHaveReturned() + expect(setFailedMock).toHaveBeenCalledWith( + new Error( + 'Too many subjects specified. The maximum number of subjects is 2500.' + ) + ) + }) + }) }) // Stubbing the GitHub context is a bit tricky. We need to use diff --git a/__tests__/subject.test.ts b/__tests__/subject.test.ts index 38260e0a..9014b391 100644 --- a/__tests__/subject.test.ts +++ b/__tests__/subject.test.ts @@ -151,7 +151,7 @@ describe('subjectFromInputs', () => { }) }) - describe('when the file eixts', () => { + describe('when the file exists', () => { let dir = '' const filename = 'subject' const content = 'file content' diff --git a/action.yml b/action.yml index 7b4a6b18..7ad27b22 100644 --- a/action.yml +++ b/action.yml @@ -10,7 +10,7 @@ inputs: description: > Path to the artifact serving as the subject of the attestation. Must specify exactly one of "subject-path" or "subject-digest". May contain a - glob pattern or list of paths (total subject count cannot exceed 64). + glob pattern or list of paths (total subject count cannot exceed 2500). required: false subject-digest: description: > diff --git a/dist/index.js b/dist/index.js index 8a3279ec..a9f3349a 100644 --- a/dist/index.js +++ b/dist/index.js @@ -80002,7 +80002,6 @@ Object.defineProperty(exports, "__esModule", ({ value: true })); const core = __importStar(__nccwpck_require__(42186)); const main_1 = __nccwpck_require__(70399); const DEFAULT_BATCH_SIZE = 50; -const DEFAULT_BATCH_DELAY = 5000; const inputs = { subjectPath: core.getInput('subject-path'), subjectName: core.getInput('subject-name'), @@ -80015,8 +80014,7 @@ const inputs = { // undocumented -- not part of public interface privateSigning: ['true', 'True', 'TRUE', '1'].includes(core.getInput('private-signing')), // internal only - batchSize: DEFAULT_BATCH_SIZE, - batchDelay: DEFAULT_BATCH_DELAY + batchSize: DEFAULT_BATCH_SIZE }; // eslint-disable-next-line @typescript-eslint/no-floating-promises (0, main_1.run)(inputs); @@ -80068,6 +80066,8 @@ const predicate_1 = __nccwpck_require__(72103); const style = __importStar(__nccwpck_require__(41583)); const subject_1 = __nccwpck_require__(95206); const ATTESTATION_FILE_NAME = 'attestation.jsonl'; +const DELAY_INTERVAL_MS = 75; +const DELAY_MAX_MS = 1200; /* istanbul ignore next */ const logHandler = (level, ...args) => { // Send any HTTP-related log events to the GitHub Actions debug log @@ -80101,17 +80101,18 @@ async function run(inputs) { const outputPath = path_1.default.join(tempDir(), ATTESTATION_FILE_NAME); core.setOutput('bundle-path', outputPath); const subjectChunks = chunkArray(subjects, inputs.batchSize); - let chunkCount = 0; // Generate attestations for each subject serially, working in batches - for (const subjectChunk of subjectChunks) { - // Delay between batches (only when chunkCount > 0) - if (chunkCount++) { - await new Promise(resolve => setTimeout(resolve, inputs.batchDelay)); - } + for (let i = 0; i < subjectChunks.length; i++) { if (subjectChunks.length > 1) { - core.info(`Processing subject batch ${chunkCount}/${subjectChunks.length}`); - } - for (const subject of subjectChunk) { + core.info(`Processing subject batch ${i + 1}/${subjectChunks.length}`); + } + // Calculate the delay time for this batch + const delayTime = delay(i); + for (const subject of subjectChunks[i]) { + // Delay between attestations (only when chunk size > 1) + if (i > 0) { + await new Promise(resolve => setTimeout(resolve, delayTime)); + } const att = await (0, attest_1.createAttestation)(subject, predicate, { sigstoreInstance, pushToRegistry: inputs.pushToRegistry, @@ -80190,6 +80191,8 @@ const tempDir = () => { const chunkArray = (array, chunkSize) => { return Array.from({ length: Math.ceil(array.length / chunkSize) }, (_, index) => array.slice(index * chunkSize, (index + 1) * chunkSize)); }; +// Calculate the delay time for a given iteration +const delay = (iteration) => Math.min(DELAY_INTERVAL_MS * 2 ** iteration, DELAY_MAX_MS); const attestationURL = (id) => `${github.context.serverUrl}/${github.context.repo.owner}/${github.context.repo.repo}/attestations/${id}`; @@ -80287,6 +80290,7 @@ const crypto_1 = __importDefault(__nccwpck_require__(6113)); const sync_1 = __nccwpck_require__(74393); const fs_1 = __importDefault(__nccwpck_require__(57147)); const path_1 = __importDefault(__nccwpck_require__(71017)); +const MAX_SUBJECT_COUNT = 2500; const DIGEST_ALGORITHM = 'sha256'; // Returns the subject specified by the action's inputs. The subject may be // specified as a path to a file or as a digest. If a path is provided, the @@ -80317,27 +80321,31 @@ exports.subjectFromInputs = subjectFromInputs; // Returns the subject specified by the path to a file. The file's digest is // calculated and returned along with the subject's name. const getSubjectFromPath = async (subjectPath, subjectName) => { - const subjects = []; + const digestedSubjects = []; + const files = []; // Parse the list of subject paths const subjectPaths = parseList(subjectPath); + // Expand the globbed paths to a list of files for (const subPath of subjectPaths) { - // Expand the globbed path to a list of files /* eslint-disable-next-line github/no-then */ - const files = await glob.create(subPath).then(async (g) => g.glob()); - for (const file of files) { - // Skip anything that is NOT a file - if (!fs_1.default.statSync(file).isFile()) { - continue; - } - const name = subjectName || path_1.default.parse(file).base; - const digest = await digestFile(DIGEST_ALGORITHM, file); - subjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } }); + files.push(...(await glob.create(subPath).then(async (g) => g.glob()))); + } + if (files.length > MAX_SUBJECT_COUNT) { + throw new Error(`Too many subjects specified. The maximum number of subjects is ${MAX_SUBJECT_COUNT}.`); + } + for (const file of files) { + // Skip anything that is NOT a file + if (!fs_1.default.statSync(file).isFile()) { + continue; } + const name = subjectName || path_1.default.parse(file).base; + const digest = await digestFile(DIGEST_ALGORITHM, file); + digestedSubjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } }); } - if (subjects.length === 0) { + if (digestedSubjects.length === 0) { throw new Error(`Could not find subject at path ${subjectPath}`); } - return Promise.all(subjects); + return digestedSubjects; }; // Returns the subject specified by the digest of a file. The digest is returned // along with the subject's name. diff --git a/src/index.ts b/src/index.ts index 2a546f85..20d3da3d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,7 +5,6 @@ import * as core from '@actions/core' import { run, RunInputs } from './main' const DEFAULT_BATCH_SIZE = 50 -const DEFAULT_BATCH_DELAY = 5000 const inputs: RunInputs = { subjectPath: core.getInput('subject-path'), @@ -21,8 +20,7 @@ const inputs: RunInputs = { core.getInput('private-signing') ), // internal only - batchSize: DEFAULT_BATCH_SIZE, - batchDelay: DEFAULT_BATCH_DELAY + batchSize: DEFAULT_BATCH_SIZE } // eslint-disable-next-line @typescript-eslint/no-floating-promises diff --git a/src/main.ts b/src/main.ts index 90e8732a..c8b032ba 100644 --- a/src/main.ts +++ b/src/main.ts @@ -10,6 +10,8 @@ import * as style from './style' import { SubjectInputs, subjectFromInputs } from './subject' const ATTESTATION_FILE_NAME = 'attestation.jsonl' +const DELAY_INTERVAL_MS = 75 +const DELAY_MAX_MS = 1200 export type RunInputs = SubjectInputs & PredicateInputs & { @@ -17,7 +19,6 @@ export type RunInputs = SubjectInputs & githubToken: string privateSigning: boolean batchSize: number - batchDelay: number } /* istanbul ignore next */ @@ -62,22 +63,22 @@ export async function run(inputs: RunInputs): Promise { core.setOutput('bundle-path', outputPath) const subjectChunks = chunkArray(subjects, inputs.batchSize) - let chunkCount = 0 // Generate attestations for each subject serially, working in batches - for (const subjectChunk of subjectChunks) { - // Delay between batches (only when chunkCount > 0) - if (chunkCount++) { - await new Promise(resolve => setTimeout(resolve, inputs.batchDelay)) - } - + for (let i = 0; i < subjectChunks.length; i++) { if (subjectChunks.length > 1) { - core.info( - `Processing subject batch ${chunkCount}/${subjectChunks.length}` - ) + core.info(`Processing subject batch ${i + 1}/${subjectChunks.length}`) } - for (const subject of subjectChunk) { + // Calculate the delay time for this batch + const delayTime = delay(i) + + for (const subject of subjectChunks[i]) { + // Delay between attestations (only when chunk size > 1) + if (i > 0) { + await new Promise(resolve => setTimeout(resolve, delayTime)) + } + const att = await createAttestation(subject, predicate, { sigstoreInstance, pushToRegistry: inputs.pushToRegistry, @@ -197,5 +198,9 @@ const chunkArray = (array: T[], chunkSize: number): T[][] => { ) } +// Calculate the delay time for a given iteration +const delay = (iteration: number): number => + Math.min(DELAY_INTERVAL_MS * 2 ** iteration, DELAY_MAX_MS) + const attestationURL = (id: string): string => `${github.context.serverUrl}/${github.context.repo.owner}/${github.context.repo.repo}/attestations/${id}` diff --git a/src/subject.ts b/src/subject.ts index 43245808..947fdca1 100644 --- a/src/subject.ts +++ b/src/subject.ts @@ -6,6 +6,7 @@ import path from 'path' import type { Subject } from '@actions/attest' +const MAX_SUBJECT_COUNT = 2500 const DIGEST_ALGORITHM = 'sha256' export type SubjectInputs = { @@ -54,34 +55,41 @@ const getSubjectFromPath = async ( subjectPath: string, subjectName?: string ): Promise => { - const subjects: Subject[] = [] + const digestedSubjects: Subject[] = [] + const files: string[] = [] // Parse the list of subject paths const subjectPaths = parseList(subjectPath) + // Expand the globbed paths to a list of files for (const subPath of subjectPaths) { - // Expand the globbed path to a list of files /* eslint-disable-next-line github/no-then */ - const files = await glob.create(subPath).then(async g => g.glob()) - - for (const file of files) { - // Skip anything that is NOT a file - if (!fs.statSync(file).isFile()) { - continue - } + files.push(...(await glob.create(subPath).then(async g => g.glob()))) + } - const name = subjectName || path.parse(file).base - const digest = await digestFile(DIGEST_ALGORITHM, file) + if (files.length > MAX_SUBJECT_COUNT) { + throw new Error( + `Too many subjects specified. The maximum number of subjects is ${MAX_SUBJECT_COUNT}.` + ) + } - subjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } }) + for (const file of files) { + // Skip anything that is NOT a file + if (!fs.statSync(file).isFile()) { + continue } + + const name = subjectName || path.parse(file).base + const digest = await digestFile(DIGEST_ALGORITHM, file) + + digestedSubjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } }) } - if (subjects.length === 0) { + if (digestedSubjects.length === 0) { throw new Error(`Could not find subject at path ${subjectPath}`) } - return Promise.all(subjects) + return digestedSubjects } // Returns the subject specified by the digest of a file. The digest is returned