Skip to content

Commit

Permalink
batch processing w/ exponential backoff (#79)
Browse files Browse the repository at this point in the history
Signed-off-by: Brian DeHamer <[email protected]>
  • Loading branch information
bdehamer authored Jun 3, 2024
1 parent a0652ef commit 9e752e3
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 63 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ See [action.yml](action.yml)
with:
# Path to the artifact serving as the subject of the attestation. Must
# specify exactly one of "subject-path" or "subject-digest". May contain
# a glob pattern or list of paths (total subject count cannot exceed 64).
# a glob pattern or list of paths (total subject count cannot exceed 2500).
subject-path:
# SHA256 digest of the subject for the attestation. Must be in the form
Expand Down Expand Up @@ -115,6 +115,15 @@ If multiple subjects are being attested at the same time, each attestation will
be written to the output file on a separate line (using the [JSON Lines][7]
format).

## Attestation Limits

### Subject Limits

No more than 2500 subjects can be attested at the same time. Subjects will be
processed in batches 50. After the initial group of 50, each subsequent batch
will incur an exponentially increasing amount of delay (capped at 1 minute of
delay per batch) to avoid overwhelming the attestation API.

## Examples

### Identify Subject by Path
Expand Down Expand Up @@ -175,8 +184,8 @@ fully-qualified image name (e.g. "ghcr.io/user/app" or
"acme.azurecr.io/user/app"). Do NOT include a tag as part of the image name --
the specific image being attested is identified by the supplied digest.

> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the
> registry portion of the image name.
> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the registry
> portion of the image name.

```yaml
name: build-attested-image
Expand Down
54 changes: 50 additions & 4 deletions __tests__/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ const defaultInputs: main.RunInputs = {
pushToRegistry: false,
githubToken: '',
privateSigning: false,
batchSize: 50,
batchDelay: 5000
batchSize: 50
}

describe('action', () => {
Expand Down Expand Up @@ -355,8 +354,7 @@ describe('action', () => {
predicateType,
predicate,
githubToken: 'gh-token',
batchSize: 2,
batchDelay: 500
batchSize: 2
}
await main.run(inputs)

Expand All @@ -377,6 +375,54 @@ describe('action', () => {
expect(scope.isDone()).toBe(true)
})
})

describe('when the subject count exceeds the max', () => {
let dir = ''
const filename = 'subject'

beforeEach(async () => {
const subjectCount = 2501
const content = 'file content'

// Set-up temp directory
const tmpDir = await fs.realpath(os.tmpdir())
dir = await fs.mkdtemp(tmpDir + path.sep)

// Add files for glob testing
for (let i = 0; i < subjectCount; i++) {
await fs.writeFile(path.join(dir, `${filename}-${i}`), content)
}

// Set the GH context with private repository visibility and a repo owner.
setGHContext({
payload: { repository: { visibility: 'private' } },
repo: { owner: 'foo', repo: 'bar' }
})
})

afterEach(async () => {
// Clean-up temp directory
await fs.rm(dir, { recursive: true })
})

it('sets a failed status', async () => {
const inputs: main.RunInputs = {
...defaultInputs,
subjectPath: path.join(dir, `${filename}-*`),
predicateType,
predicate,
githubToken: 'gh-token'
}
await main.run(inputs)

expect(runMock).toHaveReturned()
expect(setFailedMock).toHaveBeenCalledWith(
new Error(
'Too many subjects specified. The maximum number of subjects is 2500.'
)
)
})
})
})

// Stubbing the GitHub context is a bit tricky. We need to use
Expand Down
2 changes: 1 addition & 1 deletion __tests__/subject.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ describe('subjectFromInputs', () => {
})
})

describe('when the file eixts', () => {
describe('when the file exists', () => {
let dir = ''
const filename = 'subject'
const content = 'file content'
Expand Down
2 changes: 1 addition & 1 deletion action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ inputs:
description: >
Path to the artifact serving as the subject of the attestation. Must
specify exactly one of "subject-path" or "subject-digest". May contain a
glob pattern or list of paths (total subject count cannot exceed 64).
glob pattern or list of paths (total subject count cannot exceed 2500).
required: false
subject-digest:
description: >
Expand Down
58 changes: 33 additions & 25 deletions dist/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import * as core from '@actions/core'
import { run, RunInputs } from './main'

const DEFAULT_BATCH_SIZE = 50
const DEFAULT_BATCH_DELAY = 5000

const inputs: RunInputs = {
subjectPath: core.getInput('subject-path'),
Expand All @@ -21,8 +20,7 @@ const inputs: RunInputs = {
core.getInput('private-signing')
),
// internal only
batchSize: DEFAULT_BATCH_SIZE,
batchDelay: DEFAULT_BATCH_DELAY
batchSize: DEFAULT_BATCH_SIZE
}

// eslint-disable-next-line @typescript-eslint/no-floating-promises
Expand Down
29 changes: 17 additions & 12 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ import * as style from './style'
import { SubjectInputs, subjectFromInputs } from './subject'

const ATTESTATION_FILE_NAME = 'attestation.jsonl'
const DELAY_INTERVAL_MS = 75
const DELAY_MAX_MS = 1200

export type RunInputs = SubjectInputs &
PredicateInputs & {
pushToRegistry: boolean
githubToken: string
privateSigning: boolean
batchSize: number
batchDelay: number
}

/* istanbul ignore next */
Expand Down Expand Up @@ -62,22 +63,22 @@ export async function run(inputs: RunInputs): Promise<void> {
core.setOutput('bundle-path', outputPath)

const subjectChunks = chunkArray(subjects, inputs.batchSize)
let chunkCount = 0

// Generate attestations for each subject serially, working in batches
for (const subjectChunk of subjectChunks) {
// Delay between batches (only when chunkCount > 0)
if (chunkCount++) {
await new Promise(resolve => setTimeout(resolve, inputs.batchDelay))
}

for (let i = 0; i < subjectChunks.length; i++) {
if (subjectChunks.length > 1) {
core.info(
`Processing subject batch ${chunkCount}/${subjectChunks.length}`
)
core.info(`Processing subject batch ${i + 1}/${subjectChunks.length}`)
}

for (const subject of subjectChunk) {
// Calculate the delay time for this batch
const delayTime = delay(i)

for (const subject of subjectChunks[i]) {
// Delay between attestations (only when chunk size > 1)
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, delayTime))
}

const att = await createAttestation(subject, predicate, {
sigstoreInstance,
pushToRegistry: inputs.pushToRegistry,
Expand Down Expand Up @@ -197,5 +198,9 @@ const chunkArray = <T>(array: T[], chunkSize: number): T[][] => {
)
}

// Calculate the delay time for a given iteration
const delay = (iteration: number): number =>
Math.min(DELAY_INTERVAL_MS * 2 ** iteration, DELAY_MAX_MS)

const attestationURL = (id: string): string =>
`${github.context.serverUrl}/${github.context.repo.owner}/${github.context.repo.repo}/attestations/${id}`
36 changes: 22 additions & 14 deletions src/subject.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import path from 'path'

import type { Subject } from '@actions/attest'

const MAX_SUBJECT_COUNT = 2500
const DIGEST_ALGORITHM = 'sha256'

export type SubjectInputs = {
Expand Down Expand Up @@ -54,34 +55,41 @@ const getSubjectFromPath = async (
subjectPath: string,
subjectName?: string
): Promise<Subject[]> => {
const subjects: Subject[] = []
const digestedSubjects: Subject[] = []
const files: string[] = []

// Parse the list of subject paths
const subjectPaths = parseList(subjectPath)

// Expand the globbed paths to a list of files
for (const subPath of subjectPaths) {
// Expand the globbed path to a list of files
/* eslint-disable-next-line github/no-then */
const files = await glob.create(subPath).then(async g => g.glob())

for (const file of files) {
// Skip anything that is NOT a file
if (!fs.statSync(file).isFile()) {
continue
}
files.push(...(await glob.create(subPath).then(async g => g.glob())))
}

const name = subjectName || path.parse(file).base
const digest = await digestFile(DIGEST_ALGORITHM, file)
if (files.length > MAX_SUBJECT_COUNT) {
throw new Error(
`Too many subjects specified. The maximum number of subjects is ${MAX_SUBJECT_COUNT}.`
)
}

subjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } })
for (const file of files) {
// Skip anything that is NOT a file
if (!fs.statSync(file).isFile()) {
continue
}

const name = subjectName || path.parse(file).base
const digest = await digestFile(DIGEST_ALGORITHM, file)

digestedSubjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } })
}

if (subjects.length === 0) {
if (digestedSubjects.length === 0) {
throw new Error(`Could not find subject at path ${subjectPath}`)
}

return Promise.all(subjects)
return digestedSubjects
}

// Returns the subject specified by the digest of a file. The digest is returned
Expand Down

0 comments on commit 9e752e3

Please sign in to comment.