Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Backup proof failures to google cloud storage #11255

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ spec:
value: "{{ .Values.proverNode.txGathering.intervalMs }}"
- name: PROVER_NODE_TX_GATHERING_MAX_PARALLEL_REQUESTS
value: "{{ .Values.proverNode.txGathering.maxParallelRequests }}"
- name: PROVER_FAILED_PROOF_STORE
value: "{{ .Values.proverNode.failedProofStore }}"
- name: OTEL_RESOURCE_ATTRIBUTES
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: L1_CHAIN_ID
Expand Down
1 change: 1 addition & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ proverNode:
timeoutMs: 60000
intervalMs: 1000
maxParallelRequests: 100
failedProofStore: ""

pxe:
logLevel: "debug; info: aztec:simulator, json-rpc"
Expand Down
8 changes: 8 additions & 0 deletions yarn-project/circuit-types/src/interfaces/prover-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ export type ProverConfig = ActualProverConfig & {
nodeUrl?: string;
/** Identifier of the prover */
proverId: Fr;
/** Number of proving agents to start within the prover. */
proverAgentCount: number;
/** Store for failed proof inputs. */
failedProofStore?: string;
};

export const ProverConfigSchema = z.object({
Expand Down Expand Up @@ -60,6 +63,11 @@ export const proverConfigMappings: ConfigMappingsType<ProverConfig> = {
description: 'The number of prover agents to start',
...numberConfigHelper(1),
},
failedProofStore: {
env: 'PROVER_FAILED_PROOF_STORE',
description:
'Store for failed proof inputs. Google cloud storage is only supported at the moment. Set this value as gs://bucket-name/path/to/store.',
},
};

function parseProverId(str: string) {
Expand Down
44 changes: 40 additions & 4 deletions yarn-project/circuit-types/src/interfaces/proving-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import { type ZodFor } from '@aztec/foundation/schemas';

import { z } from 'zod';

import { type CircuitName } from '../stats/index.js';
import { type ServerCircuitName } from '../stats/index.js';

export type ProofAndVerificationKey<N extends number> = {
proof: RecursiveProof<N>;
Expand Down Expand Up @@ -95,7 +95,7 @@ export enum ProvingRequestType {
TUBE_PROOF,
}

export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): CircuitName {
export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): ServerCircuitName {
switch (type) {
case ProvingRequestType.PUBLIC_VM:
return 'avm-circuit';
Expand All @@ -121,11 +121,12 @@ export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): Ci
return 'root-parity';
case ProvingRequestType.TUBE_PROOF:
return 'tube-circuit';
default:
default: {
const _exhaustive: never = type;
throw new Error(`Cannot find circuit name for proving request type: ${type}`);
}
}
}

export type AvmProvingRequest = z.infer<typeof AvmProvingRequestSchema>;

export const AvmProvingRequestSchema = z.object({
Expand All @@ -150,7 +151,42 @@ export const ProvingJobInputs = z.discriminatedUnion('type', [
z.object({ type: z.literal(ProvingRequestType.ROOT_ROLLUP), inputs: RootRollupInputs.schema }),
z.object({ type: z.literal(ProvingRequestType.TUBE_PROOF), inputs: TubeInputs.schema }),
]);

export function getProvingJobInputClassFor(type: ProvingRequestType) {
switch (type) {
case ProvingRequestType.PUBLIC_VM:
return AvmCircuitInputs;
case ProvingRequestType.PRIVATE_BASE_ROLLUP:
return PrivateBaseRollupInputs;
case ProvingRequestType.PUBLIC_BASE_ROLLUP:
return PublicBaseRollupInputs;
case ProvingRequestType.MERGE_ROLLUP:
return MergeRollupInputs;
case ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP:
return EmptyBlockRootRollupInputs;
case ProvingRequestType.BLOCK_ROOT_ROLLUP:
return BlockRootRollupInputs;
case ProvingRequestType.SINGLE_TX_BLOCK_ROOT_ROLLUP:
return SingleTxBlockRootRollupInputs;
case ProvingRequestType.BLOCK_MERGE_ROLLUP:
return BlockMergeRollupInputs;
case ProvingRequestType.ROOT_ROLLUP:
return RootRollupInputs;
case ProvingRequestType.BASE_PARITY:
return BaseParityInputs;
case ProvingRequestType.ROOT_PARITY:
return RootParityInputs;
case ProvingRequestType.TUBE_PROOF:
return TubeInputs;
default: {
const _exhaustive: never = type;
throw new Error(`Cannot find circuit inputs class for proving type ${type}`);
}
}
}

export type ProvingJobInputs = z.infer<typeof ProvingJobInputs>;

export type ProvingJobInputsMap = {
[ProvingRequestType.PUBLIC_VM]: AvmCircuitInputs;
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: PrivateBaseRollupInputs;
Expand Down
18 changes: 11 additions & 7 deletions yarn-project/circuit-types/src/stats/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,15 @@ export type NodeSyncedChainHistoryStats = {
dbSize: number;
};

export type CircuitName =
export type ClientCircuitName =
| 'private-kernel-init'
| 'private-kernel-inner'
| 'private-kernel-reset'
| 'private-kernel-tail'
| 'private-kernel-tail-to-public'
| 'app-circuit';

export type ServerCircuitName =
| 'base-parity'
| 'root-parity'
| 'private-base-rollup'
Expand All @@ -84,15 +92,11 @@ export type CircuitName =
| 'empty-block-root-rollup'
| 'block-merge-rollup'
| 'root-rollup'
| 'private-kernel-init'
| 'private-kernel-inner'
| 'private-kernel-reset'
| 'private-kernel-tail'
| 'private-kernel-tail-to-public'
| 'app-circuit'
| 'avm-circuit'
| 'tube-circuit';

export type CircuitName = ClientCircuitName | ServerCircuitName;

/** Stats for circuit simulation. */
export type CircuitSimulationStats = {
/** name of the event. */
Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ export type EnvVar =
| 'PROVER_BROKER_JOB_MAX_RETRIES'
| 'PROVER_COORDINATION_NODE_URL'
| 'PROVER_DISABLED'
| 'PROVER_FAILED_PROOF_STORE'
| 'PROVER_ID'
| 'PROVER_JOB_POLL_INTERVAL_MS'
| 'PROVER_JOB_TIMEOUT_MS'
Expand Down
4 changes: 3 additions & 1 deletion yarn-project/prover-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
"formatting:fix": "run -T eslint --fix ./src && run -T prettier -w ./src",
"bb": "node --no-warnings ./dest/bb/index.js",
"test": "NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=3500000 --forceExit",
"test:debug": "LOG_LEVEL=debug NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=1500000 --forceExit --testNamePattern prover/bb_prover/parity"
"test:debug": "LOG_LEVEL=debug NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=1500000 --forceExit --testNamePattern prover/bb_prover/parity",
"get-proof-inputs": "node --no-warnings ./dest/bin/get-proof-inputs.js"
},
"jest": {
"moduleNameMapper": {
Expand Down Expand Up @@ -76,6 +77,7 @@
"@aztec/simulator": "workspace:^",
"@aztec/telemetry-client": "workspace:^",
"@aztec/world-state": "workspace:^",
"@google-cloud/storage": "^7.15.0",
"@noir-lang/types": "portal:../../noir/packages/types",
"commander": "^12.1.0",
"lodash.chunk": "^4.2.0",
Expand Down
60 changes: 60 additions & 0 deletions yarn-project/prover-client/src/bin/get-proof-inputs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* eslint-disable no-console */
import { AVM_HINTS_FILENAME, AVM_PUBLIC_INPUTS_FILENAME } from '@aztec/bb-prover';
import { type ProofUri, ProvingJobInputs, ProvingRequestType } from '@aztec/circuit-types';
import { jsonParseWithSchema, jsonStringify } from '@aztec/foundation/json-rpc';
import { createLogger } from '@aztec/foundation/log';

import { mkdirSync, writeFileSync } from 'fs';

import { createProofStoreForUri } from '../proving_broker/index.js';

const logger = createLogger('prover-client:get-proof-inputs');

function printUsage() {
console.error('Usage: get-proof-inputs <proof-uri> [out-dir=.]');
}

async function main() {
if (process.argv[2] === '--help') {
printUsage();
return;
}

const uri = process.argv[2];
const outDir = process.argv[3] || '.';
if (!uri) {
printUsage();
throw new Error('Missing proof URI');
}

mkdirSync(outDir, { recursive: true });

const proofStore = createProofStoreForUri(uri);
logger.info(`Processing uri ${uri}`);
const input = await proofStore.getProofInput(uri as ProofUri);
logger.info(`Found inputs for ${ProvingRequestType[input.type]}`);
writeProofInputs(input, outDir);

console.log(jsonParseWithSchema(jsonStringify(input), ProvingJobInputs).inputs);
}

// This mimics the behavior of bb-prover/src/bb/execute.ts
function writeProofInputs(input: ProvingJobInputs, outDir: string) {
switch (input.type) {
case ProvingRequestType.PUBLIC_VM: {
writeFileSync(`${outDir}/${AVM_PUBLIC_INPUTS_FILENAME}`, input.inputs.output.toBuffer());
logger.info(`Wrote AVM public inputs to ${AVM_PUBLIC_INPUTS_FILENAME}`);
writeFileSync(`${outDir}/${AVM_HINTS_FILENAME}`, input.inputs.avmHints.toBuffer());
logger.info(`Wrote AVM hints to ${AVM_HINTS_FILENAME}`);
break;
}
default: {
throw new Error(`Unimplemented proving request type: ${ProvingRequestType[input.type]}`);
}
}
}

main().catch(err => {
console.error(err);
process.exit(1);
});
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { AbortError } from '@aztec/foundation/error';
import { sleep } from '@aztec/foundation/sleep';
import { getTelemetryClient } from '@aztec/telemetry-client';

import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store/index.js';
import { MemoryProvingQueue } from './memory-proving-queue.js';

describe('MemoryProvingQueue', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import { type PromiseWithResolvers, RunningPromise, promiseWithResolvers } from
import { PriorityMemoryQueue } from '@aztec/foundation/queue';
import { type TelemetryClient, type Tracer, trackSpan } from '@aztec/telemetry-client';

import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store/index.js';
import { ProvingQueueMetrics } from './queue_metrics.js';

type ProvingJobWithResolvers<T extends ProvingRequestType = ProvingRequestType> = ProvingJob &
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import {
trackSpan,
} from '@aztec/telemetry-client';

import { InlineProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore } from '../proving_broker/proof_store/index.js';

const PRINT_THRESHOLD_NS = 6e10; // 60 seconds

Expand Down
12 changes: 9 additions & 3 deletions yarn-project/prover-client/src/prover-client/prover-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { type TelemetryClient, getTelemetryClient } from '@aztec/telemetry-clien
import { type ProverClientConfig } from '../config.js';
import { ProvingOrchestrator } from '../orchestrator/orchestrator.js';
import { BrokerCircuitProverFacade } from '../proving_broker/broker_prover_facade.js';
import { InlineProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore, createProofStore } from '../proving_broker/proof_store/index.js';
import { ProvingAgent } from '../proving_broker/proving_agent.js';
import { ServerEpochProver } from './server-epoch-prover.js';

Expand All @@ -27,17 +27,23 @@ export class ProverClient implements EpochProverManager {
private running = false;
private agents: ProvingAgent[] = [];

private proofStore: ProofStore;
private failedProofStore: ProofStore | undefined;

private constructor(
private config: ProverClientConfig,
private worldState: ForkMerkleTreeOperations,
private orchestratorClient: ProvingJobProducer,
private agentClient?: ProvingJobConsumer,
private telemetry: TelemetryClient = getTelemetryClient(),
private log = createLogger('prover-client:tx-prover'),
) {}
) {
this.proofStore = new InlineProofStore();
this.failedProofStore = this.config.failedProofStore ? createProofStore(this.config.failedProofStore) : undefined;
}

public createEpochProver(): EpochProver {
const facade = new BrokerCircuitProverFacade(this.orchestratorClient);
const facade = new BrokerCircuitProverFacade(this.orchestratorClient, this.proofStore, this.failedProofStore);
const orchestrator = new ProvingOrchestrator(this.worldState, facade, this.config.proverId, this.telemetry);
return new ServerEpochProver(facade, orchestrator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,23 @@ import { jest } from '@jest/globals';

import { MockProver, TestBroker } from '../test/mock_prover.js';
import { BrokerCircuitProverFacade } from './broker_prover_facade.js';
import { InlineProofStore } from './proof_store.js';
import { InlineProofStore } from './proof_store/index.js';

describe('BrokerCircuitProverFacade', () => {
let facade: BrokerCircuitProverFacade;
let proofStore: InlineProofStore;
let errorProofStore: InlineProofStore;
let broker: TestBroker;
let prover: MockProver;
let agentPollInterval: number;

beforeEach(async () => {
proofStore = new InlineProofStore();
errorProofStore = new InlineProofStore();
prover = new MockProver();
agentPollInterval = 100;
broker = new TestBroker(2, prover, proofStore, agentPollInterval);
facade = new BrokerCircuitProverFacade(broker, proofStore);
facade = new BrokerCircuitProverFacade(broker, proofStore, errorProofStore);

await broker.start();
facade.start();
Expand All @@ -31,6 +33,7 @@ describe('BrokerCircuitProverFacade', () => {
afterEach(async () => {
await broker.stop();
await facade.stop();
jest.restoreAllMocks();
});

it('sends jobs to the broker', async () => {
Expand All @@ -39,11 +42,13 @@ describe('BrokerCircuitProverFacade', () => {

jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof');
jest.spyOn(errorProofStore, 'saveProofInput');

await expect(facade.getBaseParityProof(inputs, controller.signal, 42)).resolves.toBeDefined();

expect(broker.enqueueProvingJob).toHaveBeenCalled();
expect(prover.getBaseParityProof).toHaveBeenCalledWith(inputs, expect.anything(), 42);
expect(errorProofStore.saveProofInput).not.toHaveBeenCalled();
});

it('handles multiple calls for the same job', async () => {
Expand Down Expand Up @@ -103,6 +108,7 @@ describe('BrokerCircuitProverFacade', () => {
const resultPromise = promiseWithResolvers<any>();
jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof').mockReturnValue(resultPromise.promise);
jest.spyOn(errorProofStore, 'saveProofInput');

// send N identical proof requests
const CALLS = 50;
Expand Down Expand Up @@ -136,6 +142,8 @@ describe('BrokerCircuitProverFacade', () => {
expect(broker.enqueueProvingJob).toHaveBeenCalledTimes(2);
// but no new jobs where created
expect(prover.getBaseParityProof).toHaveBeenCalledTimes(1);
// and the proof input will have been backed up
expect(errorProofStore.saveProofInput).toHaveBeenCalled();
});

it('handles aborts', async () => {
Expand All @@ -145,6 +153,7 @@ describe('BrokerCircuitProverFacade', () => {
const resultPromise = promiseWithResolvers<any>();
jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof').mockReturnValue(resultPromise.promise);
jest.spyOn(errorProofStore, 'saveProofInput');

const promise = facade.getBaseParityProof(inputs, controller.signal, 42).catch(err => ({ err }));

Expand All @@ -154,6 +163,7 @@ describe('BrokerCircuitProverFacade', () => {
controller.abort();

await expect(promise).resolves.toEqual({ err: new Error('Aborted') });
expect(errorProofStore.saveProofInput).not.toHaveBeenCalled();
});

it('rejects jobs when the facade is stopped', async () => {
Expand Down
Loading
Loading