diff --git a/spartan/aztec-network/files/config/setup-service-addresses.sh b/spartan/aztec-network/files/config/setup-service-addresses.sh index 5b01449150a..e3d6431c4ad 100644 --- a/spartan/aztec-network/files/config/setup-service-addresses.sh +++ b/spartan/aztec-network/files/config/setup-service-addresses.sh @@ -86,7 +86,7 @@ else fi # Configure OTEL_COLLECTOR_ENDPOINT if not set in values file -if [ "${TELEMETRY:-false}" = "true" ] && [ "${OTEL_COLLECTOR_ENDPOINT}" = "" ]; then +if [ "${TELEMETRY:-false}" = "true" ] && [ "${OTEL_COLLECTOR_ENDPOINT}" = "" ] && [ "${USE_GCLOUD_OBSERVABILITY:-false}" = "false" ]; then OTEL_COLLECTOR_PORT=${OTEL_COLLECTOR_PORT:-4318} OTEL_COLLECTOR_ENDPOINT="http://metrics-opentelemetry-collector.metrics:$OTEL_COLLECTOR_PORT" fi diff --git a/spartan/aztec-network/templates/_helpers.tpl b/spartan/aztec-network/templates/_helpers.tpl index 3a7ef490e2c..526a9ab11ba 100644 --- a/spartan/aztec-network/templates/_helpers.tpl +++ b/spartan/aztec-network/templates/_helpers.tpl @@ -155,6 +155,8 @@ Service Address Setup Container value: "{{ .Values.proverNode.service.nodePort }}" - name: PROVER_BROKER_PORT value: "{{ .Values.proverBroker.service.nodePort }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" - name: SERVICE_NAME value: {{ include "aztec-network.fullname" . }} volumeMounts: diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index ad0122920ae..6bada4fb7ac 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -55,13 +55,6 @@ spec: sleep 5 done echo "Ethereum node is ready!" - {{- if .Values.telemetry.enabled }} - until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do - echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." - sleep 5 - done - echo "OpenTelemetry collector is ready!" - {{- end }} volumeMounts: - name: config mountPath: /shared/config @@ -123,6 +116,12 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" {{- end }} containers: - name: boot-node @@ -181,6 +180,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: boot-node + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: NODE_OPTIONS value: "--max-old-space-size={{ .Values.bootNode.maxOldSpaceSize}}" - name: AZTEC_PORT @@ -235,6 +238,8 @@ spec: value: "{{ .Values.storage.dataStoreMapSize }}" - name: WS_DB_MAP_SIZE_KB value: "{{ .Values.storage.worldStateMapSize }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - containerPort: {{ .Values.bootNode.service.nodePort }} - containerPort: {{ .Values.bootNode.service.p2pTcpPort }} diff --git a/spartan/aztec-network/templates/deploy-l1-verifier.yaml b/spartan/aztec-network/templates/deploy-l1-verifier.yaml index be8479acb15..c21dcccbe93 100644 --- a/spartan/aztec-network/templates/deploy-l1-verifier.yaml +++ b/spartan/aztec-network/templates/deploy-l1-verifier.yaml @@ -82,6 +82,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: NODE_NO_WARNINGS value: "1" - name: LOG_LEVEL @@ -108,6 +112,8 @@ spec: value: "{{ .Values.proverNode.service.nodePort }}" - name: SERVICE_NAME value: {{ include "aztec-network.fullname" . }} + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" volumeMounts: - name: config mountPath: /shared/config diff --git a/spartan/aztec-network/templates/faucet.yaml b/spartan/aztec-network/templates/faucet.yaml index 67b76336a05..d1b77b9fe79 100644 --- a/spartan/aztec-network/templates/faucet.yaml +++ b/spartan/aztec-network/templates/faucet.yaml @@ -92,6 +92,8 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: faucet + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - name: http containerPort: {{ .Values.faucet.service.nodePort }} diff --git a/spartan/aztec-network/templates/prover-agent.yaml b/spartan/aztec-network/templates/prover-agent.yaml index 7f288af3bfb..744b2bf2fcf 100644 --- a/spartan/aztec-network/templates/prover-agent.yaml +++ b/spartan/aztec-network/templates/prover-agent.yaml @@ -57,13 +57,6 @@ spec: sleep 5 done echo "Broker is ready!" - {{- if .Values.telemetry.enabled }} - until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do - echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." - sleep 5 - done - echo "OpenTelemetry collector is ready!" - {{- end }} volumeMounts: - name: config mountPath: /shared/config @@ -90,6 +83,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: prover-agent + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: AZTEC_PORT value: "{{ .Values.proverAgent.service.nodePort }}" - name: LOG_LEVEL @@ -106,6 +103,8 @@ spec: value: {{ join "," .Values.proverAgent.proofTypes | quote }} - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" resources: {{- toYaml .Values.proverAgent.resources | nindent 12 }} {{- end }} diff --git a/spartan/aztec-network/templates/prover-broker.yaml b/spartan/aztec-network/templates/prover-broker.yaml index 78401defcea..84eef388993 100644 --- a/spartan/aztec-network/templates/prover-broker.yaml +++ b/spartan/aztec-network/templates/prover-broker.yaml @@ -54,13 +54,6 @@ spec: - | source /shared/config/service-addresses cat /shared/config/service-addresses - {{- if .Values.telemetry.enabled }} - until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do - echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." - sleep 5 - done - echo "OpenTelemetry collector is ready!" - {{- end }} volumeMounts: - name: config mountPath: /shared/config @@ -89,6 +82,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: prover-broker + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: NODE_OPTIONS value: "--max-old-space-size={{ .Values.proverBroker.maxOldSpaceSize}}" - name: AZTEC_PORT @@ -109,6 +106,8 @@ spec: value: "{{ .Values.storage.dataStoreMapSize }}" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" resources: {{- toYaml .Values.proverBroker.resources | nindent 12 }} volumes: diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index a41c56730e7..f926c88fdfd 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -64,13 +64,6 @@ spec: echo "Using built-in job broker" fi - {{- if .Values.telemetry.enabled }} - until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do - echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." - sleep 5 - done - echo "OpenTelemetry collector is ready!" - {{- end }} until curl --head --silent $BOOT_NODE_HOST/status; do echo "Waiting for boot node..." sleep 5 @@ -132,6 +125,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: prover-node + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: POD_IP valueFrom: fieldRef: @@ -194,6 +191,8 @@ spec: value: "{{ .Values.storage.dataStoreMapSize }}" - name: WS_DB_MAP_SIZE_KB value: "{{ .Values.storage.worldStateMapSize }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - containerPort: {{ .Values.proverNode.service.nodePort }} - containerPort: {{ .Values.proverNode.service.p2pTcpPort }} diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index be0d0321d3a..738c6c5a73b 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -91,6 +91,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: pxe + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: AZTEC_PORT value: "{{ .Values.pxe.service.nodePort }}" - name: LOG_JSON @@ -99,6 +103,8 @@ spec: value: "{{ .Values.pxe.logLevel }}" - name: PXE_PROVER_ENABLED value: "{{ .Values.aztec.realProofs }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - name: http containerPort: {{ .Values.pxe.service.nodePort }} diff --git a/spartan/aztec-network/templates/setup-l2-contracts.yaml b/spartan/aztec-network/templates/setup-l2-contracts.yaml index 218db443662..807421d84cd 100644 --- a/spartan/aztec-network/templates/setup-l2-contracts.yaml +++ b/spartan/aztec-network/templates/setup-l2-contracts.yaml @@ -74,6 +74,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: TELEMETRY value: "{{ .Values.telemetry.enabled }}" - name: LOG_LEVEL @@ -96,4 +100,6 @@ spec: value: "{{ .Values.proverNode.service.nodePort }}" - name: SERVICE_NAME value: {{ include "aztec-network.fullname" . }} + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" {{ end }} diff --git a/spartan/aztec-network/templates/transaction-bot.yaml b/spartan/aztec-network/templates/transaction-bot.yaml index fc573dfb723..07f682cd857 100644 --- a/spartan/aztec-network/templates/transaction-bot.yaml +++ b/spartan/aztec-network/templates/transaction-bot.yaml @@ -85,6 +85,10 @@ spec: fieldPath: metadata.name - name: OTEL_SERVICE_NAME value: bot + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: AZTEC_PORT value: "{{ .Values.bot.service.nodePort }}" - name: LOG_JSON @@ -113,6 +117,8 @@ spec: value: "{{ .Values.bot.maxErrors }}" - name: BOT_STOP_WHEN_UNHEALTHY value: "{{ .Values.bot.stopIfUnhealthy }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - name: http containerPort: {{ .Values.bot.service.nodePort }} diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index 1c901deb572..cd898b97bc1 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -58,13 +58,6 @@ spec: done echo "Ethereum node is ready!" - {{- if .Values.telemetry.enabled }} - until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do - echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." - sleep 5 - done - echo "OpenTelemetry collector is ready!" - {{- end }} if [ "{{ .Values.validator.dynamicBootNode }}" = "true" ]; then echo "{{ include "aztec-network.pxeUrl" . }}" > /shared/pxe/pxe_url @@ -164,6 +157,10 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: K8S_NAMESPACE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: NODE_OPTIONS value: "--max-old-space-size={{ .Values.validator.maxOldSpaceSize}}" - name: AZTEC_PORT @@ -220,6 +217,8 @@ spec: value: "{{ .Values.storage.dataStoreMapSize }}" - name: WS_DB_MAP_SIZE_KB value: "{{ .Values.storage.worldStateMapSize }}" + - name: USE_GCLOUD_OBSERVABILITY + value: "{{ .Values.telemetry.useGcloudObservability }}" ports: - containerPort: {{ .Values.validator.service.nodePort }} - containerPort: {{ .Values.validator.service.p2pTcpPort }} diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 37ccb843f27..0f14931793c 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -19,6 +19,7 @@ storage: telemetry: enabled: false otelCollectorEndpoint: + useGcloudObservability: false images: aztec: diff --git a/spartan/aztec-network/values/exp-1.yaml b/spartan/aztec-network/values/exp-1.yaml index 190fb2fefe0..0ef7a0a96d6 100644 --- a/spartan/aztec-network/values/exp-1.yaml +++ b/spartan/aztec-network/values/exp-1.yaml @@ -11,7 +11,6 @@ aztec: telemetry: enabled: true - otelCollectorEndpoint: http://35.197.100.168:4318 images: aztec: diff --git a/spartan/aztec-network/values/exp-2.yaml b/spartan/aztec-network/values/exp-2.yaml index a2601a184ad..ef39aace4d9 100644 --- a/spartan/aztec-network/values/exp-2.yaml +++ b/spartan/aztec-network/values/exp-2.yaml @@ -1,6 +1,5 @@ telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 network: setupL2Contracts: false @@ -29,4 +28,4 @@ proverNode: proverPublisherPrivateKey: bot: - txIntervalSeconds: 20 \ No newline at end of file + txIntervalSeconds: 20 diff --git a/spartan/aztec-network/values/rc-1.yaml b/spartan/aztec-network/values/rc-1.yaml index 249fd2a6569..6ad5c3443a0 100644 --- a/spartan/aztec-network/values/rc-1.yaml +++ b/spartan/aztec-network/values/rc-1.yaml @@ -12,7 +12,6 @@ images: telemetry: enabled: true - otelCollectorEndpoint: http://35.197.100.168:4318 validator: storageSize: "100Gi" diff --git a/spartan/aztec-network/values/rc-2.yaml b/spartan/aztec-network/values/rc-2.yaml index 1f14593561d..86954dd0cc3 100644 --- a/spartan/aztec-network/values/rc-2.yaml +++ b/spartan/aztec-network/values/rc-2.yaml @@ -12,7 +12,6 @@ images: telemetry: enabled: true - otelCollectorEndpoint: http://35.197.100.168:4318 validator: replicas: 48 diff --git a/spartan/aztec-network/values/sepolia-3-validators-with-metrics.yaml b/spartan/aztec-network/values/sepolia-3-validators-with-metrics.yaml index a2601a184ad..ef39aace4d9 100644 --- a/spartan/aztec-network/values/sepolia-3-validators-with-metrics.yaml +++ b/spartan/aztec-network/values/sepolia-3-validators-with-metrics.yaml @@ -1,6 +1,5 @@ telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 network: setupL2Contracts: false @@ -29,4 +28,4 @@ proverNode: proverPublisherPrivateKey: bot: - txIntervalSeconds: 20 \ No newline at end of file + txIntervalSeconds: 20 diff --git a/spartan/aztec-network/values/sepolia-48-validators-with-metrics.yaml b/spartan/aztec-network/values/sepolia-48-validators-with-metrics.yaml index 25781c330f2..089abbd9f74 100644 --- a/spartan/aztec-network/values/sepolia-48-validators-with-metrics.yaml +++ b/spartan/aztec-network/values/sepolia-48-validators-with-metrics.yaml @@ -1,6 +1,5 @@ telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 network: setupL2Contracts: false @@ -75,4 +74,4 @@ proverNode: proverPublisherPrivateKey: bot: - txIntervalSeconds: 5 \ No newline at end of file + txIntervalSeconds: 5 diff --git a/spartan/terraform/deploy-release/main.tf b/spartan/terraform/deploy-release/main.tf index 3972a690489..70967493b00 100644 --- a/spartan/terraform/deploy-release/main.tf +++ b/spartan/terraform/deploy-release/main.tf @@ -100,6 +100,11 @@ resource "helm_release" "aztec-gke-cluster" { value = var.L1_DEPLOYMENT_SALT } + set { + name = "telemetry.useGcloudObservability" + value = "true" + } + # Setting timeout and wait conditions timeout = 1200 # 20 minutes in seconds wait = true diff --git a/yarn-project/foundation/src/config/env_var.ts b/yarn-project/foundation/src/config/env_var.ts index ac64348ece2..f33e808bfb6 100644 --- a/yarn-project/foundation/src/config/env_var.ts +++ b/yarn-project/foundation/src/config/env_var.ts @@ -64,6 +64,7 @@ export type EnvVar = | 'NETWORK' | 'NO_PXE' | 'COIN_ISSUER_CONTRACT_ADDRESS' + | 'USE_GCLOUD_OBSERVABILITY' | 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT' | 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT' | 'OTEL_EXPORTER_OTLP_LOGS_ENDPOINT' @@ -202,4 +203,5 @@ export type EnvVar = | 'FAUCET_INTERVAL_MS' | 'FAUCET_L1_ASSETS' | 'K8S_POD_NAME' - | 'K8S_POD_UID'; + | 'K8S_POD_UID' + | 'K8S_NAMESPACE_NAME'; diff --git a/yarn-project/foundation/src/log/pino-logger.ts b/yarn-project/foundation/src/log/pino-logger.ts index 21772b4bc59..40291006c9c 100644 --- a/yarn-project/foundation/src/log/pino-logger.ts +++ b/yarn-project/foundation/src/log/pino-logger.ts @@ -117,8 +117,10 @@ const levelToSeverityFormatter = (label: string, level: number): object => { return { severity, level }; }; +const useGcloudObservability = process.env.USE_GCLOUD_OBSERVABILITY === 'true'; const pinoOpts: pino.LoggerOptions = { customLevels, + messageKey: useGcloudObservability ? 'message' : 'msg', useOnlyCustomLevels: false, level: logLevel, formatters: { @@ -172,7 +174,6 @@ const otelTransport: pino.TransportTargetOptions = { options: otelOpts, level: 'trace', }; - function makeLogger() { if (!isNode) { // We are on the browser. diff --git a/yarn-project/telemetry-client/package.json b/yarn-project/telemetry-client/package.json index 33b25e466c9..7686730c0bd 100644 --- a/yarn-project/telemetry-client/package.json +++ b/yarn-project/telemetry-client/package.json @@ -30,6 +30,9 @@ "dependencies": { "@aztec/circuit-types": "workspace:^", "@aztec/foundation": "workspace:^", + "@google-cloud/opentelemetry-cloud-monitoring-exporter": "^0.20.0", + "@google-cloud/opentelemetry-cloud-trace-exporter": "^2.4.1", + "@google-cloud/opentelemetry-resource-util": "^2.4.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/api-logs": "^0.55.0", "@opentelemetry/core": "^1.28.0", diff --git a/yarn-project/telemetry-client/src/aztec_resource_detector.ts b/yarn-project/telemetry-client/src/aztec_resource_detector.ts index a16ee9ca1ec..0b984a63593 100644 --- a/yarn-project/telemetry-client/src/aztec_resource_detector.ts +++ b/yarn-project/telemetry-client/src/aztec_resource_detector.ts @@ -1,5 +1,6 @@ import { type DetectorSync, type IResource, Resource } from '@opentelemetry/resources'; import { + ATTR_K8S_NAMESPACE_NAME, ATTR_K8S_POD_NAME, ATTR_K8S_POD_UID, ATTR_SERVICE_INSTANCE_ID, @@ -19,6 +20,7 @@ class AztecDetector implements DetectorSync { [ATTR_K8S_POD_NAME]: config.k8sPodName, // this will get set by serviceInstanceIdDetector if not running in K8s [ATTR_SERVICE_INSTANCE_ID]: config.k8sPodUid, + [ATTR_K8S_NAMESPACE_NAME]: config.k8sNamespaceName, }); } } diff --git a/yarn-project/telemetry-client/src/config.ts b/yarn-project/telemetry-client/src/config.ts index bef8275cf2a..e4613d27494 100644 --- a/yarn-project/telemetry-client/src/config.ts +++ b/yarn-project/telemetry-client/src/config.ts @@ -1,6 +1,7 @@ import { type ConfigMappingsType, getConfigFromMappings } from '@aztec/foundation/config'; export interface TelemetryClientConfig { + useGcloudObservability: boolean; metricsCollectorUrl?: URL; tracesCollectorUrl?: URL; logsCollectorUrl?: URL; @@ -10,9 +11,16 @@ export interface TelemetryClientConfig { otelExportTimeoutMs: number; k8sPodUid?: string; k8sPodName?: string; + k8sNamespaceName?: string; } export const telemetryClientConfigMappings: ConfigMappingsType = { + useGcloudObservability: { + env: 'USE_GCLOUD_OBSERVABILITY', + description: 'Whether to use GCP observability', + defaultValue: false, + parseEnv: (val: string) => val === 'true', + }, metricsCollectorUrl: { env: 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT', description: 'The URL of the telemetry collector for metrics', @@ -58,6 +66,10 @@ export const telemetryClientConfigMappings: ConfigMappingsType OpenTelemetryClient; + export class OpenTelemetryClient implements TelemetryClient { hostMetrics: HostMetrics | undefined; eventLoopMonitor: EventLoopMonitor | undefined; @@ -44,7 +49,7 @@ export class OpenTelemetryClient implements TelemetryClient { private resource: IResource, private meterProvider: MeterProvider, private traceProvider: TracerProvider, - private loggerProvider: LoggerProvider, + private loggerProvider: LoggerProvider | undefined, private log: Logger, ) {} @@ -112,7 +117,7 @@ export class OpenTelemetryClient implements TelemetryClient { public async flush() { await Promise.all([ this.meterProvider.forceFlush(), - this.loggerProvider.forceFlush(), + this.loggerProvider?.forceFlush(), this.traceProvider instanceof NodeTracerProvider ? this.traceProvider.forceFlush() : Promise.resolve(), ]); } @@ -120,7 +125,10 @@ export class OpenTelemetryClient implements TelemetryClient { public async stop() { this.eventLoopMonitor?.stop(); - const flushAndShutdown = async (provider: { forceFlush: () => Promise; shutdown: () => Promise }) => { + const flushAndShutdown = async (provider?: { forceFlush: () => Promise; shutdown: () => Promise }) => { + if (!provider) { + return; + } await provider.forceFlush(); await provider.shutdown(); }; @@ -132,30 +140,16 @@ export class OpenTelemetryClient implements TelemetryClient { ]); } - public static async createAndStart(config: TelemetryClientConfig, log: Logger): Promise { - const resource = await getOtelResource(); - - // TODO(palla/log): Should we show traces as logs in stdout when otel collection is disabled? - const tracerProvider = new NodeTracerProvider({ + public static createMeterProvider( + resource: IResource, + options: Partial, + ): MeterProvider { + return new MeterProvider({ resource, - spanProcessors: config.tracesCollectorUrl - ? [new BatchSpanProcessor(new OTLPTraceExporter({ url: config.tracesCollectorUrl.href }))] + readers: options.exporter + ? [new PeriodicExportingMetricReader(options as PeriodicExportingMetricReaderOptions)] : [], - }); - - tracerProvider.register(); - const meterProvider = new MeterProvider({ - resource, - readers: [ - new PeriodicExportingMetricReader({ - exporter: new OTLPMetricExporter({ - url: config.metricsCollectorUrl!.href, - }), - exportIntervalMillis: config.otelCollectIntervalMs, - exportTimeoutMillis: config.otelExportTimeoutMs, - }), - ], views: [ // Every histogram matching the selector (type + unit) gets these custom buckets assigned new View({ @@ -241,10 +235,59 @@ export class OpenTelemetryClient implements TelemetryClient { }), ], }); + } + + public static getGcloudClientFactory(config: TelemetryClientConfig): OpenTelemetryClientFactory { + return (resource: IResource, log: Logger) => { + const tracerProvider = new NodeTracerProvider({ + resource, + spanProcessors: [new BatchSpanProcessor(new TraceExporter({ resourceFilter: /.*/ }))], + }); + + tracerProvider.register(); + + const meterProvider = OpenTelemetryClient.createMeterProvider(resource, { + exporter: new MetricExporter(), + exportTimeoutMillis: config.otelExportTimeoutMs, + exportIntervalMillis: config.otelCollectIntervalMs, + }); - const loggerProvider = await registerOtelLoggerProvider(resource, config.logsCollectorUrl); + return new OpenTelemetryClient(resource, meterProvider, tracerProvider, undefined, log); + }; + } + + public static getCustomClientFactory(config: TelemetryClientConfig): OpenTelemetryClientFactory { + return (resource: IResource, log: Logger) => { + const tracerProvider = new NodeTracerProvider({ + resource, + spanProcessors: config.tracesCollectorUrl + ? [new BatchSpanProcessor(new OTLPTraceExporter({ url: config.tracesCollectorUrl.href }))] + : [], + }); + + tracerProvider.register(); + + const meterProvider = OpenTelemetryClient.createMeterProvider(resource, { + exporter: config.metricsCollectorUrl + ? new OTLPMetricExporter({ url: config.metricsCollectorUrl.href }) + : undefined, + exportTimeoutMillis: config.otelExportTimeoutMs, + exportIntervalMillis: config.otelCollectIntervalMs, + }); + + const loggerProvider = registerOtelLoggerProvider(resource, config.logsCollectorUrl); + + return new OpenTelemetryClient(resource, meterProvider, tracerProvider, loggerProvider, log); + }; + } + + public static async createAndStart(config: TelemetryClientConfig, log: Logger): Promise { + const resource = await getOtelResource(); + const factory = config.useGcloudObservability + ? OpenTelemetryClient.getGcloudClientFactory(config) + : OpenTelemetryClient.getCustomClientFactory(config); - const service = new OpenTelemetryClient(resource, meterProvider, tracerProvider, loggerProvider, log); + const service = factory(resource, log); service.start(); return service; diff --git a/yarn-project/telemetry-client/src/otel_logger_provider.ts b/yarn-project/telemetry-client/src/otel_logger_provider.ts index 8690584ec35..6efeaaca334 100644 --- a/yarn-project/telemetry-client/src/otel_logger_provider.ts +++ b/yarn-project/telemetry-client/src/otel_logger_provider.ts @@ -4,11 +4,7 @@ import { CompressionAlgorithm } from '@opentelemetry/otlp-exporter-base'; import { type IResource } from '@opentelemetry/resources'; import { BatchLogRecordProcessor, LoggerProvider } from '@opentelemetry/sdk-logs'; -import { getOtelResource } from './otel_resource.js'; - -export async function registerOtelLoggerProvider(resource?: IResource, otelLogsUrl?: URL) { - resource ??= await getOtelResource(); - +export function registerOtelLoggerProvider(resource: IResource, otelLogsUrl?: URL) { const loggerProvider = new LoggerProvider({ resource }); if (!otelLogsUrl) { // If no URL provided, return it disconnected. diff --git a/yarn-project/telemetry-client/src/otel_resource.ts b/yarn-project/telemetry-client/src/otel_resource.ts index 9b777d09aeb..6a41d95c992 100644 --- a/yarn-project/telemetry-client/src/otel_resource.ts +++ b/yarn-project/telemetry-client/src/otel_resource.ts @@ -1,4 +1,4 @@ -// import { gcpDetector } from '@opentelemetry/resource-detector-gcp'; +import { GcpDetectorSync } from '@google-cloud/opentelemetry-resource-util'; import { type IResource, detectResourcesSync, @@ -17,8 +17,8 @@ export async function getOtelResource(): Promise { envDetectorSync, processDetectorSync, serviceInstanceIdDetectorSync, - // gcpDetector, aztecDetector, + new GcpDetectorSync(), ], }); diff --git a/yarn-project/telemetry-client/src/start.ts b/yarn-project/telemetry-client/src/start.ts index d33866c6c1b..fe8a40dabec 100644 --- a/yarn-project/telemetry-client/src/start.ts +++ b/yarn-project/telemetry-client/src/start.ts @@ -9,8 +9,8 @@ export * from './config.js'; export async function createAndStartTelemetryClient(config: TelemetryClientConfig): Promise { const log = createLogger('telemetry:client'); - if (config.metricsCollectorUrl) { - log.info('Using OpenTelemetry client'); + if (config.metricsCollectorUrl || config.useGcloudObservability) { + log.info(`Using OpenTelemetry client ${config.useGcloudObservability ? 'with GCP' : 'with custom collector'}`); return await OpenTelemetryClient.createAndStart(config, log); } else { log.info('Using NoopTelemetryClient'); diff --git a/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts b/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts index 3f1361df7a7..88610f15e76 100644 --- a/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts +++ b/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts @@ -21,6 +21,7 @@ import { millisToHrTime } from '@opentelemetry/core'; import { Writable } from 'stream'; import { registerOtelLoggerProvider } from '../otel_logger_provider.js'; +import { getOtelResource } from '../otel_resource.js'; /* eslint-disable @typescript-eslint/ban-types */ /* eslint-disable camelcase */ @@ -274,7 +275,8 @@ export class OTelPinoStream extends Writable { // nodejs loop, as opposed to in a worker as pino recommends. export default async function (options: OTelPinoStreamOptions) { const url = process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT; + const resource = await getOtelResource(); // We re-register here because this runs on a worker thread - await registerOtelLoggerProvider(undefined, url ? new URL(url) : undefined); + registerOtelLoggerProvider(resource, url ? new URL(url) : undefined); return new OTelPinoStream(options); } diff --git a/yarn-project/yarn.lock b/yarn-project/yarn.lock index 7918c3b8281..81d027a8b50 100644 --- a/yarn-project/yarn.lock +++ b/yarn-project/yarn.lock @@ -1276,6 +1276,9 @@ __metadata: dependencies: "@aztec/circuit-types": "workspace:^" "@aztec/foundation": "workspace:^" + "@google-cloud/opentelemetry-cloud-monitoring-exporter": "npm:^0.20.0" + "@google-cloud/opentelemetry-cloud-trace-exporter": "npm:^2.4.1" + "@google-cloud/opentelemetry-resource-util": "npm:^2.4.0" "@jest/globals": "npm:^29.5.0" "@opentelemetry/api": "npm:^1.9.0" "@opentelemetry/api-logs": "npm:^0.55.0" @@ -2402,6 +2405,83 @@ __metadata: languageName: node linkType: hard +"@google-cloud/opentelemetry-cloud-monitoring-exporter@npm:^0.20.0": + version: 0.20.0 + resolution: "@google-cloud/opentelemetry-cloud-monitoring-exporter@npm:0.20.0" + dependencies: + "@google-cloud/opentelemetry-resource-util": "npm:^2.4.0" + "@google-cloud/precise-date": "npm:^4.0.0" + google-auth-library: "npm:^9.0.0" + googleapis: "npm:^137.0.0" + peerDependencies: + "@opentelemetry/api": ^1.0.0 + "@opentelemetry/core": ^1.0.0 + "@opentelemetry/resources": ^1.0.0 + "@opentelemetry/sdk-metrics": ^1.0.0 + checksum: 10/b71f9335e32e9f5caf9b3c9511eb7029db5cc1d3d86c8e35f5ea25a47ee39ef8bb1fb635d2f454c933f229351841f08453cb6f89a41f0bd6dab2011c783b2c4b + languageName: node + linkType: hard + +"@google-cloud/opentelemetry-cloud-trace-exporter@npm:^2.4.1": + version: 2.4.1 + resolution: "@google-cloud/opentelemetry-cloud-trace-exporter@npm:2.4.1" + dependencies: + "@google-cloud/opentelemetry-resource-util": "npm:^2.4.0" + "@grpc/grpc-js": "npm:^1.1.8" + "@grpc/proto-loader": "npm:^0.7.0" + google-auth-library: "npm:^9.0.0" + peerDependencies: + "@opentelemetry/api": ^1.0.0 + "@opentelemetry/core": ^1.0.0 + "@opentelemetry/resources": ^1.0.0 + "@opentelemetry/sdk-trace-base": ^1.0.0 + checksum: 10/81dca152f0e84141c76f8a79d7e3b07bee3063cd16a370b53f1ddadf3a0ee6f2547ef8d93cb6007b17780ed568770fe2937af7dbe85a3b66e0e08c364eef44b0 + languageName: node + linkType: hard + +"@google-cloud/opentelemetry-resource-util@npm:^2.4.0": + version: 2.4.0 + resolution: "@google-cloud/opentelemetry-resource-util@npm:2.4.0" + dependencies: + "@opentelemetry/semantic-conventions": "npm:^1.22.0" + gcp-metadata: "npm:^6.0.0" + peerDependencies: + "@opentelemetry/resources": ^1.0.0 + checksum: 10/119116a19549ea1a5c43d50f880959e2fe512860e316a1aa15e3e8088696629995c09c0287e7fcca42baa65fcacc5d3cd74eb3f6b0b9984dc523962d6cb36265 + languageName: node + linkType: hard + +"@google-cloud/precise-date@npm:^4.0.0": + version: 4.0.0 + resolution: "@google-cloud/precise-date@npm:4.0.0" + checksum: 10/7c897bcad6a40efd77df165fca8a57bd3fbb84e6ace848085b16233c1086c32c06258caa88b9c9aaf7d4029ded13d01b57d1b5fd808dc5722cf57626ac4a386b + languageName: node + linkType: hard + +"@grpc/grpc-js@npm:^1.1.8": + version: 1.12.5 + resolution: "@grpc/grpc-js@npm:1.12.5" + dependencies: + "@grpc/proto-loader": "npm:^0.7.13" + "@js-sdsl/ordered-map": "npm:^4.4.2" + checksum: 10/4f8ead236dcab4d94e15e62d65ad2d93732d37f5cc52ffafe67ae00f69eae4a4c97d6d34a1b9eac9f30206468f2d15302ea6649afcba1d38929afa9d1e7c12d5 + languageName: node + linkType: hard + +"@grpc/proto-loader@npm:^0.7.0, @grpc/proto-loader@npm:^0.7.13": + version: 0.7.13 + resolution: "@grpc/proto-loader@npm:0.7.13" + dependencies: + lodash.camelcase: "npm:^4.3.0" + long: "npm:^5.0.0" + protobufjs: "npm:^7.2.5" + yargs: "npm:^17.7.2" + bin: + proto-loader-gen-types: build/bin/proto-loader-gen-types.js + checksum: 10/7e2d842c2061cbaf6450c71da0077263be3bab165454d5c8a3e1ae4d3c6d2915f02fd27da63ff01f05e127b1221acd40705273f5d29303901e60514e852992f4 + languageName: node + linkType: hard + "@hapi/bourne@npm:^3.0.0": version: 3.0.0 resolution: "@hapi/bourne@npm:3.0.0" @@ -2943,6 +3023,13 @@ __metadata: languageName: node linkType: hard +"@js-sdsl/ordered-map@npm:^4.4.2": + version: 4.4.2 + resolution: "@js-sdsl/ordered-map@npm:4.4.2" + checksum: 10/ac64e3f0615ecc015461c9f527f124d2edaa9e68de153c1e270c627e01e83d046522d7e872692fd57a8c514578b539afceff75831c0d8b2a9a7a347fbed35af4 + languageName: node + linkType: hard + "@jsonjoy.com/base64@npm:^1.1.1": version: 1.1.2 resolution: "@jsonjoy.com/base64@npm:1.1.2" @@ -4232,7 +4319,7 @@ __metadata: languageName: node linkType: hard -"@opentelemetry/semantic-conventions@npm:^1.27.0, @opentelemetry/semantic-conventions@npm:^1.28.0": +"@opentelemetry/semantic-conventions@npm:^1.22.0, @opentelemetry/semantic-conventions@npm:^1.27.0, @opentelemetry/semantic-conventions@npm:^1.28.0": version: 1.28.0 resolution: "@opentelemetry/semantic-conventions@npm:1.28.0" checksum: 10/c182a3206769b5d5a8ab89a5c674d046fd789421cef27ea55af179990e314732433c98e5017aa23e99f15fd2b0e13cb129bb6c2282da6860ce9419adf32b2e87 @@ -7281,7 +7368,7 @@ __metadata: languageName: node linkType: hard -"base64-js@npm:^1.0.2, base64-js@npm:^1.3.1": +"base64-js@npm:^1.0.2, base64-js@npm:^1.3.0, base64-js@npm:^1.3.1": version: 1.5.1 resolution: "base64-js@npm:1.5.1" checksum: 10/669632eb3745404c2f822a18fc3a0122d2f9a7a13f7fb8b5823ee19d1d2ff9ee5b52c53367176ea4ad093c332fd5ab4bd0ebae5a8e27917a4105a4cfc86b1005 @@ -7699,6 +7786,13 @@ __metadata: languageName: node linkType: hard +"buffer-equal-constant-time@npm:1.0.1": + version: 1.0.1 + resolution: "buffer-equal-constant-time@npm:1.0.1" + checksum: 10/80bb945f5d782a56f374b292770901065bad21420e34936ecbe949e57724b4a13874f735850dd1cc61f078773c4fb5493a41391e7bda40d1fa388d6bd80daaab + languageName: node + linkType: hard + "buffer-from@npm:^1.0.0": version: 1.1.2 resolution: "buffer-from@npm:1.1.2" @@ -9841,6 +9935,15 @@ __metadata: languageName: node linkType: hard +"ecdsa-sig-formatter@npm:1.0.11, ecdsa-sig-formatter@npm:^1.0.11": + version: 1.0.11 + resolution: "ecdsa-sig-formatter@npm:1.0.11" + dependencies: + safe-buffer: "npm:^5.0.1" + checksum: 10/878e1aab8a42773320bc04c6de420bee21aebd71810e40b1799880a8a1c4594bcd6adc3d4213a0fb8147d4c3f529d8f9a618d7f59ad5a9a41b142058aceda23f + languageName: node + linkType: hard + "ee-first@npm:1.1.1": version: 1.1.1 resolution: "ee-first@npm:1.1.1" @@ -11289,7 +11392,7 @@ __metadata: languageName: node linkType: hard -"gaxios@npm:^6.0.0": +"gaxios@npm:^6.0.0, gaxios@npm:^6.0.3, gaxios@npm:^6.1.1": version: 6.7.1 resolution: "gaxios@npm:6.7.1" dependencies: @@ -11302,7 +11405,7 @@ __metadata: languageName: node linkType: hard -"gcp-metadata@npm:^6.0.0": +"gcp-metadata@npm:^6.0.0, gcp-metadata@npm:^6.1.0": version: 6.1.0 resolution: "gcp-metadata@npm:6.1.0" dependencies: @@ -11581,6 +11684,44 @@ __metadata: languageName: node linkType: hard +"google-auth-library@npm:^9.0.0, google-auth-library@npm:^9.7.0": + version: 9.15.0 + resolution: "google-auth-library@npm:9.15.0" + dependencies: + base64-js: "npm:^1.3.0" + ecdsa-sig-formatter: "npm:^1.0.11" + gaxios: "npm:^6.1.1" + gcp-metadata: "npm:^6.1.0" + gtoken: "npm:^7.0.0" + jws: "npm:^4.0.0" + checksum: 10/fba2db9732bbf1b3a3a2e2b45131ba8e8aba297377f1c104d0b2ab3386bbc1e02047f20b8a7afca1c6308492da1540104618f1c7b5cd539703552e10399c560e + languageName: node + linkType: hard + +"googleapis-common@npm:^7.0.0": + version: 7.2.0 + resolution: "googleapis-common@npm:7.2.0" + dependencies: + extend: "npm:^3.0.2" + gaxios: "npm:^6.0.3" + google-auth-library: "npm:^9.7.0" + qs: "npm:^6.7.0" + url-template: "npm:^2.0.8" + uuid: "npm:^9.0.0" + checksum: 10/4b914be6681f2a5a02bd0954a4a5cee1725d8623cb9d0a7c2fd7132de110e8d5707566cba39784e58147be39e74bc5513ad30fdcdaa6edcbb47ecf687003cb6c + languageName: node + linkType: hard + +"googleapis@npm:^137.0.0": + version: 137.1.0 + resolution: "googleapis@npm:137.1.0" + dependencies: + google-auth-library: "npm:^9.0.0" + googleapis-common: "npm:^7.0.0" + checksum: 10/5d7cee8a7293701a19db3ee515272fbb932c343d1640ca21a3996dc717649817a6d169cd8370ee6e535bd2d48cb70f0b426abe691b22830d588419ebe95b5740 + languageName: node + linkType: hard + "gopd@npm:^1.0.1": version: 1.0.1 resolution: "gopd@npm:1.0.1" @@ -11604,6 +11745,16 @@ __metadata: languageName: node linkType: hard +"gtoken@npm:^7.0.0": + version: 7.1.0 + resolution: "gtoken@npm:7.1.0" + dependencies: + gaxios: "npm:^6.0.0" + jws: "npm:^4.0.0" + checksum: 10/640392261e55c9242137a81a4af8feb053b57061762cedddcbb6a0d62c2314316161808ac2529eea67d06d69fdc56d82361af50f2d840a04a87ea29e124d7382 + languageName: node + linkType: hard + "handle-thing@npm:^2.0.0": version: 2.0.1 resolution: "handle-thing@npm:2.0.1" @@ -13779,6 +13930,27 @@ __metadata: languageName: node linkType: hard +"jwa@npm:^2.0.0": + version: 2.0.0 + resolution: "jwa@npm:2.0.0" + dependencies: + buffer-equal-constant-time: "npm:1.0.1" + ecdsa-sig-formatter: "npm:1.0.11" + safe-buffer: "npm:^5.0.1" + checksum: 10/ab983f6685d99d13ddfbffef9b1c66309a536362a8412d49ba6e687d834a1240ce39290f30ac7dbe241e0ab6c76fee7ff795776ce534e11d148158c9b7193498 + languageName: node + linkType: hard + +"jws@npm:^4.0.0": + version: 4.0.0 + resolution: "jws@npm:4.0.0" + dependencies: + jwa: "npm:^2.0.0" + safe-buffer: "npm:^5.0.1" + checksum: 10/1d15f4cdea376c6bd6a81002bd2cb0bf3d51d83da8f0727947b5ba3e10cf366721b8c0d099bf8c1eb99eb036e2c55e5fd5efd378ccff75a2b4e0bd10002348b9 + languageName: node + linkType: hard + "keygrip@npm:~1.1.0": version: 1.1.0 resolution: "keygrip@npm:1.1.0" @@ -16506,6 +16678,26 @@ __metadata: languageName: node linkType: hard +"protobufjs@npm:^7.2.5": + version: 7.4.0 + resolution: "protobufjs@npm:7.4.0" + dependencies: + "@protobufjs/aspromise": "npm:^1.1.2" + "@protobufjs/base64": "npm:^1.1.2" + "@protobufjs/codegen": "npm:^2.0.4" + "@protobufjs/eventemitter": "npm:^1.1.0" + "@protobufjs/fetch": "npm:^1.1.0" + "@protobufjs/float": "npm:^1.0.2" + "@protobufjs/inquire": "npm:^1.1.0" + "@protobufjs/path": "npm:^1.1.2" + "@protobufjs/pool": "npm:^1.1.0" + "@protobufjs/utf8": "npm:^1.1.0" + "@types/node": "npm:>=13.7.0" + long: "npm:^5.0.0" + checksum: 10/408423506610f70858d7593632f4a6aa4f05796c90fd632be9b9252457c795acc71aa6d3b54bb7f48a890141728fee4ca3906723ccea6c202ad71f21b3879b8b + languageName: node + linkType: hard + "protobufjs@npm:^7.3.0": version: 7.3.2 resolution: "protobufjs@npm:7.3.2" @@ -16712,6 +16904,15 @@ __metadata: languageName: node linkType: hard +"qs@npm:^6.7.0": + version: 6.13.1 + resolution: "qs@npm:6.13.1" + dependencies: + side-channel: "npm:^1.0.6" + checksum: 10/53cf5fdc5f342a9ffd3968f20c8c61624924cf928d86fff525240620faba8ca5cfd6c3f12718cc755561bfc3dc9721bc8924e38f53d8925b03940f0b8a902212 + languageName: node + linkType: hard + "querystring-es3@npm:~0.2.0": version: 0.2.1 resolution: "querystring-es3@npm:0.2.1" @@ -19531,6 +19732,13 @@ __metadata: languageName: node linkType: hard +"url-template@npm:^2.0.8": + version: 2.0.8 + resolution: "url-template@npm:2.0.8" + checksum: 10/fc6a4cf6c3c3c3d7f0a0bb4405c41b81934e583b454e52ace7b2e5d7ed32ec9c2970ff1826d240c5823955fcb13531a1fc4ff6ba4569b1886a2976665353e952 + languageName: node + linkType: hard + "url@npm:~0.11.0": version: 0.11.4 resolution: "url@npm:0.11.4" @@ -19607,7 +19815,7 @@ __metadata: languageName: node linkType: hard -"uuid@npm:^9.0.1": +"uuid@npm:^9.0.0, uuid@npm:^9.0.1": version: 9.0.1 resolution: "uuid@npm:9.0.1" bin: