From a1633f61381d5c59409c8ff70dad958a67a9d717 Mon Sep 17 00:00:00 2001 From: Dom Del Nano Date: Mon, 7 Aug 2023 14:17:33 -0700 Subject: [PATCH] Ensure `kStaticallyLinkedSource` is attributed when static TLS tracing probes are added (#1655) Summary: Ensure `kStaticallyLinkedSource` is attributed when static TLS tracing probes are added When developing #1652, we noticed that there are situations where our prometheus metrics are annotated with `kStaticallyLinkedSource` instead of the correct tls source. This is due to the following [conditional logic](https://github.com/pixie-io/pixie/blob/44a8338a60e74564d94d474a7ed723717d2ebaaf/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc#L646) using a mutable variable which was returning true even when uprobes were already attached. When tracing a dynamically linked process, the uprobe count will be recorded to a non zero value when the dynamic probes are [attached](https://github.com/pixie-io/pixie/blob/44a8338a60e74564d94d474a7ed723717d2ebaaf/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc#L615). The NodeJS uprobes in [AttachNodeJsOpenSSLUprobes](https://github.com/pixie-io/pixie/blob/44a8338a60e74564d94d474a7ed723717d2ebaaf/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc#L629C16-L629C42) will then reset the `count_or` integer value back to 0 (since dynamically linked applications cannot be NodeJS -- it's statically linked). This causes us to erroneously attempt (and fail) to attach the statically linked probes in addition to setting the tls source to `kStaticallyLinkedSource`. I attempted to refactor the uprobe counting logic and this bug in a single change, but I believe it will increase the scope of this. The OpenSSL probe specs are shared amongst the dynamic, statically linked and NodeJS cases, which causes complications with counting the number of attached (rather than returning the potential count). While we are confident this bug is likely the source of the confusing BoringSSL tracing results, it would be beneficial to have that validation sooner. I will follow up this change with refactoring this code, but would prefer to proceed with this fix to unblock the BoringSSL tracing validation. Relevant Issues: #692 Type of change: /kind bug Test Plan: Printed out the promtheus `conn_stats_bytes` metrics during `openssl_trace_bpf_test` and verified they match the given test case
openssl_trace_bpf_test metric output ``` $ ./scripts/sudo_bazel_run.sh -c dbg src/stirling/source_connectors/socket_tracer:openssl_trace_bpf_test --test_arg='--gtest_filter=OpenSSLTraceTest/*.ssl_capture_curl_client' 2>&1 \| grep '^conn_stats_bytes\\|TypeParam' -- [----------] 1 test from OpenSSLTraceTest/0, where TypeParam = px::stirling::NginxOpenSSL_1_1_0_ContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 3191 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 3591 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 0 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 416 [----------] 1 test from OpenSSLTraceTest/1, where TypeParam = px::stirling::NginxOpenSSL_1_1_1_ContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 6375 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 6989 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 0 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 832 [----------] 1 test from OpenSSLTraceTest/2, where TypeParam = px::stirling::NginxOpenSSL_3_0_8_ContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_3_Source"} 3184 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 6375 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 10484 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 307 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 1248 [----------] 1 test from OpenSSLTraceTest/3, where TypeParam = px::stirling::Python310ContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibPythonSource"} 3650 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_3_Source"} 3184 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 6375 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 14350 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 307 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 1664 [----------] 1 test from OpenSSLTraceTest/4, where TypeParam = px::stirling::Node12_3_1ContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kNodeJSSource"} 1360 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibPythonSource"} 3650 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_3_Source"} 3184 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 6375 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 18367 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 307 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 133152 [----------] 1 test from OpenSSLTraceTest/5, where TypeParam = px::stirling::Node14_18_1AlpineContainerWrapper conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kNodeJSSource"} 2743 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibPythonSource"} 3650 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_3_Source"} 3184 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kLibSSL_1_1_Source"} 6375 conn_stats_bytes{protocol="kProtocolUnknown",tls_source="kSSLNone"} 22319 conn_stats_bytes{protocol="kProtocolDNS",tls_source="kSSLNone"} 307 conn_stats_bytes{protocol="kProtocolHTTP",tls_source="kSSLNone"} 155820 ```
--------- Signed-off-by: Dom Del Nano --- .../source_connectors/socket_tracer/uprobe_manager.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc b/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc index 3e1268d640d..c44dd10958a 100644 --- a/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc +++ b/src/stirling/source_connectors/socket_tracer/uprobe_manager.cc @@ -644,15 +644,11 @@ int UProbeManager::DeployOpenSSLUProbes(const absl::flat_hash_set& pid // Attach uprobes to statically linked applications only if no other probes have been attached. if (FLAGS_stirling_trace_static_tls_binaries && count_or.ok() && count_or.ValueOrDie() == 0) { - // Optimisitcally update the SSL lib source since the probes can trigger - // before the BPF map is updated. This value is cleaned up when the upid is - // terminated, so if attachment fails it will be deleted prior to the pid being - // reused. - PX_UNUSED(openssl_source_map_->SetValue(pid.pid(), kStaticallyLinkedSource)); count_or = AttachOpenSSLUProbesOnStaticBinary(pid.pid()); - if (count_or.ok()) { + if (count_or.ok() && count_or.ValueOrDie() > 0) { uprobe_count += count_or.ValueOrDie(); + PX_UNUSED(openssl_source_map_->SetValue(pid.pid(), kStaticallyLinkedSource)); VLOG(1) << absl::Substitute( "Attaching OpenSSL uprobes on executable statically linked OpenSSL library"