Skip to content

Commit

Permalink
Create metrics library skeleton and simple daemon for collecting syst…
Browse files Browse the repository at this point in the history
…em metrics

This code

(1) Creates a skeleton for a base metrics library, and handles standard initialization in the init_metrics(). A dependency from the launcher client is removed
(2) Creates a systemd service which registers the base library, and adds its own system metrics to the provided meter

Change-Id: Ie9c67ae8593f227db5b9f204b1aa8a82dcbe76e1
  • Loading branch information
msilezin committed Jun 11, 2024
1 parent 65465a3 commit d0420ed
Show file tree
Hide file tree
Showing 23 changed files with 220 additions and 23 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
- buildconfigs/oak_containers_orchestrator.sh
- buildconfigs/oak_containers_stage1.sh
- buildconfigs/oak_containers_syslogd.sh
- buildconfigs/oak_containers_agent.sh
- buildconfigs/oak_containers_system_image.sh
- buildconfigs/oak_echo_enclave_app.sh
- buildconfigs/oak_echo_raw_enclave_app.sh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/provenance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
- buildconfigs/oak_containers_orchestrator.toml
- buildconfigs/oak_containers_stage1.toml
- buildconfigs/oak_containers_syslogd.toml
- buildconfigs/oak_containers_agent.toml
- buildconfigs/oak_containers_system_image.toml
- buildconfigs/oak_echo_enclave_app.toml
- buildconfigs/oak_echo_raw_enclave_app.toml
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ members = [
"oak_attestation_verification_test_utils",
"oak_channel",
"oak_client",
"oak_containers_agent",
"oak_containers_hello_world_trusted_app",
"oak_containers_hello_world_untrusted_app",
"oak_containers_launcher",
Expand Down Expand Up @@ -109,6 +110,7 @@ oak_attestation_verification = { path = "./oak_attestation_verification" }
oak_attestation_verification_test_utils = { path = "./oak_attestation_verification_test_utils" }
oak_channel = { path = "./oak_channel" }
oak_client = { path = "./oak_client" }
oak_containers_agent = { path = "./oak_containers_agent" }
oak_containers_orchestrator = { path = "./oak_containers_orchestrator" }
oak_containers_launcher = { path = "./oak_containers_launcher" }
oak_containers_sdk = { path = "./oak_containers_sdk" }
Expand Down
2 changes: 1 addition & 1 deletion FORCE_CI
Original file line number Diff line number Diff line change
@@ -1 +1 @@
34
35
18 changes: 18 additions & 0 deletions buildconfigs/oak_containers_agent.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#
# Build configuration for oak_containers_agent.
#
export PACKAGE_NAME=oak_containers_agent

export BUILD_COMMAND=(
nix
develop
.#systemImageProvenance
--command
just
oak_containers_agent
)

export SUBJECT_PATHS=(
oak_containers_agent/target/oak_containers_agent_patched
)
9 changes: 9 additions & 0 deletions buildconfigs/oak_containers_agent.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
command = [
"nix",
"develop",
".#systemImageProvenance",
"--command",
"just",
"oak_containers_agent",
]
artifact_path = "./oak_containers_agent/target/oak_containers_agent_patched"
16 changes: 15 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ oak_containers_kernel:
oak_containers_launcher:
env cargo build --release --package='oak_containers_launcher'

oak_containers_system_image: oak_containers_orchestrator oak_containers_syslogd
oak_containers_system_image: oak_containers_agent oak_containers_orchestrator oak_containers_syslogd
echo "Using bazel config flag: $BAZEL_CONFIG_FLAG"
# Copy dependencies into bazel build.
mkdir --parents oak_containers_system_image/target/image_binaries
Expand All @@ -98,6 +98,9 @@ oak_containers_system_image: oak_containers_orchestrator oak_containers_syslogd
cp --preserve=timestamps \
oak_containers_syslogd/target/oak_containers_syslogd_patched \
oak_containers_system_image/target/image_binaries/oak_containers_syslogd
cp --preserve=timestamps \
oak_containers_agent/target/oak_containers_agent_patched \
oak_containers_system_image/target/image_binaries/oak_containers_agent
# Build and compress.
bazel build $BAZEL_CONFIG_FLAG oak_containers_system_image --build_tag_filters=+noci
cp --preserve=timestamps \
Expand Down Expand Up @@ -128,6 +131,17 @@ oak_containers_syslogd:
patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 --set-rpath "" \
oak_containers_syslogd/target/oak_containers_syslogd_patched

oak_containers_agent:
env --chdir=oak_containers_agent \
cargo build --release -Z unstable-options --out-dir=target
# We can't patch the binary in-place, as that would confuse cargo.
# Therefore we copy it to a new location and patch there.
cp \
oak_containers_agent/target/oak_containers_agent \
oak_containers_agent/target/oak_containers_agent_patched
patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 --set-rpath "" \
oak_containers_agent/target/oak_containers_agent_patched

# Profile the Wasm execution and generate a flamegraph.
profile_wasm:
# If it fails with SIGSEGV, try running again.
Expand Down
10 changes: 6 additions & 4 deletions kokoro/build_binaries_oak_containers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@ touch "${KOKORO_ARTIFACTS_DIR}/binaries/git_commit_${KOKORO_GIT_COMMIT_oak:?}"
# Copy the generated binaries to Placer. The timestamps are used to convey
# the creation time.
#
# System image deps (oak_containers_orchestrator, oak_containers_syslogd)
# are tracked to monitor their reproducibility. They are expected to be
# imported transiently into google3 for the sake of provenance verification
# (i.e., do Kokoro and GitHub produce identical results).
# System image deps (oak_containers_orchestrator, oak_containers_syslogd,
# oak_containers_agent) are tracked to monitor their reproducibility. They are
# expected to be imported transiently into google3 for the sake of provenance
# verification (i.e., do Kokoro and GitHub produce identical results).
readonly generated_binaries=(
./target/stage1.cpio
./oak_containers_kernel/target/bzImage
./oak_containers_orchestrator/target/oak_containers_orchestrator
./oak_containers_syslogd/target/oak_containers_syslogd_patched
./oak_containers_agent/target/oak_containers_agent_patched
./oak_containers_system_image/target/image.tar.xz
./oak_containers_hello_world_container/target/oak_container_example_oci_filesystem_bundle.tar
./oak_functions_containers_container/target/oak_functions_container_oci_filesystem_bundle.tar
Expand All @@ -39,6 +40,7 @@ readonly binary_names=(
oak_containers_kernel
oak_containers_orchestrator
oak_containers_syslogd
oak_containers_agent
oak_containers_system_image
oak_containers_hello_world_container
oak_functions_container
Expand Down
31 changes: 31 additions & 0 deletions oak_containers_agent/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[package]
name = "oak_containers_agent"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"

[dependencies]
anyhow = "*"
clap = { version = "*", features = ["derive"] }
opentelemetry = { version = "*", default-features = false, features = [
"metrics",
] }
opentelemetry_sdk = { version = "*", default-features = false, features = [
"metrics",
"rt-tokio",
] }
opentelemetry-otlp = { version = "*", default-features = false, features = [
"grpc-tonic",
"metrics",
] }
procfs = "*"
tokio = { version = "*", features = [
"rt-multi-thread",
"macros",
"sync",
"time",
] }
tokio-stream = { version = "*", features = ["net"] }

[build-dependencies]
oak_grpc_utils = { workspace = true }
12 changes: 12 additions & 0 deletions oak_containers_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!-- Oak Logo Start -->
<!-- An HTML element is intentionally used since GitHub recommends this approach to handle different images in dark/light modes. Ref: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#specifying-the-theme-an-image-is-shown-to -->
<!-- markdownlint-disable-next-line MD033 -->
<h1><picture><source media="(prefers-color-scheme: dark)" srcset="/docs/oak-logo/svgs/oak-containers-negative-colour.svg?sanitize=true"><source media="(prefers-color-scheme: light)" srcset="/docs/oak-logo/svgs/oak-containers.svg?sanitize=true"><img alt="Project Oak Containers Logo" src="/docs/oak-logo/svgs/oak-containers.svg?sanitize=true"></picture></h1>
<!-- Oak Logo End -->

# Oak Containers Metrics

This is a simple daemon that exports system-level metrics.

The crate includes a base library that wraps Opentelemtry to include standard
process metrics for any application importing the library
31 changes: 31 additions & 0 deletions oak_containers_agent/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//
// Copyright 2024 The Project Oak Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use oak_grpc_utils::{generate_grpc_code, CodegenOptions};

fn main() -> Result<(), Box<dyn std::error::Error>> {
// Generate gRPC code for agent.
generate_grpc_code(
&[
"../proto/containers/interfaces.proto",
"../proto/crypto/crypto.proto",
"../proto/session/messages.proto",
],
&[".."],
CodegenOptions::default(),
)?;

Ok(())
}
16 changes: 16 additions & 0 deletions oak_containers_agent/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// Copyright 2024 The Project Oak Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod metrics;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2023 The Project Oak Authors
// Copyright 2024 The Project Oak Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -14,16 +14,22 @@
// limitations under the License.
//

use std::{sync::Arc, time::Duration};
#![feature(c_size_t)]

use anyhow::Result;
use clap::Parser;
use opentelemetry::{
global::set_error_handler,
metrics::{AsyncInstrument, Meter, MeterProvider, ObservableCounter, ObservableGauge, Unit},
KeyValue,
};
use procfs::{Current, CurrentSI};
use tokio::time::{self, Duration};

use crate::launcher_client::LauncherClient;
#[derive(Parser, Debug)]
struct Args {
#[arg(default_value = "http://10.0.2.100:8080")]
launcher_addr: String,
}

// It's not dead, it's just asynchronous.
#[allow(dead_code)]
Expand Down Expand Up @@ -51,8 +57,9 @@ pub struct SystemMetrics {
mem_slab: ObservableGauge<u64>,
}

#[allow(dead_code)]
impl SystemMetrics {
fn new(meter: Meter) -> Result<Self> {
fn new(meter: Meter) -> Result<Self, anyhow::Error> {
Ok(Self {
cpu_seconds_total: meter
.u64_observable_counter("cpu_seconds_total")
Expand Down Expand Up @@ -259,11 +266,17 @@ impl SystemMetrics {
}
}

pub fn run(launcher_client: Arc<LauncherClient>) -> Result<SystemMetrics> {
let metrics = opentelemetry_otlp::new_pipeline()
.metrics(opentelemetry_sdk::runtime::Tokio)
.with_exporter(launcher_client.openmetrics_builder())
.with_period(Duration::from_secs(60))
.build()?;
SystemMetrics::new(metrics.meter("oak_containers_orchestrator"))
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
set_error_handler(|err| eprintln!("oak-agent: OTLP error: {}", err))?;

let meter_provider = oak_containers_agent::metrics::init_metrics(args.launcher_addr).unwrap();
let meter = meter_provider.meter("oak_agent");
let _system_metrics = SystemMetrics::new(meter);

// keep alive loop
loop {
time::sleep(Duration::from_secs(30)).await;
}
}
29 changes: 29 additions & 0 deletions oak_containers_agent/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// Copyright 2024 The Project Oak Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use opentelemetry::metrics::MetricsError;
use opentelemetry_otlp::{ExportConfig, WithExportConfig};
use opentelemetry_sdk::metrics::SdkMeterProvider;

pub fn init_metrics(launcher_addr: String) -> Result<SdkMeterProvider, MetricsError> {
let export_config = ExportConfig { endpoint: launcher_addr, ..ExportConfig::default() };

let provider = opentelemetry_otlp::new_pipeline()
.metrics(opentelemetry_sdk::runtime::Tokio)
.with_exporter(opentelemetry_otlp::new_exporter().tonic().with_export_config(export_config))
.build()?;

Ok(provider)
}
1 change: 0 additions & 1 deletion oak_containers_orchestrator/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ rust_library(
"src/launcher_client.rs",
"src/lib.rs",
"src/logging.rs",
"src/metrics.rs",
],
crate_features = ["bazel"],
crate_name = "oak_containers_orchestrator",
Expand Down
1 change: 0 additions & 1 deletion oak_containers_orchestrator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ opentelemetry-otlp = { version = "*", default-features = false, features = [
p256 = { version = "*", features = ["ecdsa"] }
prost = "*"
prost-types = "*"
procfs = "*"
rand_core = { version = "*", default-features = false, features = [
"getrandom",
] }
Expand Down
1 change: 0 additions & 1 deletion oak_containers_orchestrator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,3 @@ pub mod ipc_server;
pub mod key_provisioning;
pub mod launcher_client;
pub mod logging;
pub mod metrics;
2 changes: 0 additions & 2 deletions oak_containers_orchestrator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ async fn main() -> anyhow::Result<()> {
tokio::fs::create_dir_all(path).await?;
}

let _metrics = oak_containers_orchestrator::metrics::run(launcher_client.clone())?;

// Start application and gRPC servers.
let user = nix::unistd::User::from_name(&args.runtime_user)
.context(format!("error resolving user {}", args.runtime_user))?
Expand Down
1 change: 1 addition & 0 deletions oak_containers_system_image/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package(
filegroup(
name = "rust_bins",
srcs = [
"target/image_binaries/oak_containers_agent",
"target/image_binaries/oak_containers_orchestrator",
"target/image_binaries/oak_containers_syslogd",
],
Expand Down
4 changes: 4 additions & 0 deletions oak_containers_system_image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ RUN systemctl enable oak-orchestrator
COPY ./target/oak_containers_syslogd_patched /usr/bin/oak_containers_syslogd
RUN systemctl enable oak-syslogd

# Metrics agent
COPY ./target/oak_containers_agent_patched /usr/bin/oak_containers_agent
RUN systemctl enable oak-agent

# Only enable interactive logins if the kernel was booted with "debug" flag.
RUN systemctl disable getty@
RUN systemctl enable root-passwd
Expand Down
1 change: 1 addition & 0 deletions oak_containers_system_image/base_image.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ RUN systemctl enable systemd-networkd
# They don't exist yet in this image, but the symlinks will be properly created.
RUN systemctl enable oak-orchestrator
RUN systemctl enable oak-syslogd
RUN systemctl enable oak-agent

# Only enable interactive logins if the kernel was booted with "debug" flag.
RUN systemctl disable getty@
Expand Down
3 changes: 3 additions & 0 deletions oak_containers_system_image/build-old.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@ mkdir --parent target
# build the orchestrator binary
cargo build --package=oak_containers_orchestrator --profile=release-lto --target=x86_64-unknown-linux-musl -Z unstable-options --out-dir=./target
cargo build --package=oak_containers_syslogd --release -Z unstable-options --out-dir=./target
cargo build --package=oak_containers_agent --release -Z unstable-options --out-dir=./target

# We need to patch the binary to set the interpreter to the correct location, but we can't do it in-place, as that would
# confuse cargo. Therefore we copy the binary to a new location and patch that.
cp ./target/oak_containers_syslogd ./target/oak_containers_syslogd_patched
cp ./target/oak_containers_agent ./target/oak_containers_agent_patched

# When built under nix the interpreter points to some Nix-specific location that doesn't exist on a regular Linux host, therefore
# we need to manually patch the binary to set it back to the normal regular location.
patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 ./target/oak_containers_syslogd_patched
patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 ./target/oak_containers_agent_patched

# Fix the file permissions that will be loaded into the system image, as Git doesn't track them.
# Unfortunately we can't do it in Dockerfile (with `COPY --chown`), as that requires BuildKit.
Expand Down
Loading

0 comments on commit d0420ed

Please sign in to comment.