Skip to content

Commit

Permalink
Update cgroup resolver to try all possible base paths (pixie-io#1981)
Browse files Browse the repository at this point in the history
Summary: Update cgroup resolver to try all possible base paths

For certain environments, like OVH cloud's managed kubernetes service,
the auto discovery mechanism fails due to a confusion between cgroup v1
and v2. This is fixed by attempting all cgroup base paths rather than
trying the first to match and failing if it doesn't work. This is the
second iteration of this change (pixie-io#1978 was the earlier attempt).

Relevant Issues: N/A

Type of change: /kind bug

Test Plan: Community user deployed this change and verified their OVH
MKS cluster is working now

---------

Signed-off-by: Dom Del Nano <[email protected]>
GitOrigin-RevId: ff83032
  • Loading branch information
ddelnano authored and cosmic-copybara committed Aug 20, 2024
1 parent 73c5641 commit 32fa98c
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 61 deletions.
140 changes: 82 additions & 58 deletions src/shared/metadata/cgroup_path_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@
#include "src/common/fs/fs_wrapper.h"
#include "src/shared/metadata/cgroup_path_resolver.h"

DEFINE_bool(force_cgroup2_mode, true, "Flag to force assume cgroup2 fs for testing purposes");
DEFINE_bool(test_only_force_cgroup2_mode, false,
"Flag to force assume cgroup2 fs for testing purposes");

namespace px {
namespace md {

StatusOr<std::string> CGroupBasePath(std::string_view sysfs_path) {
StatusOr<std::vector<std::string>> CGroupBasePaths(std::string_view sysfs_path) {
std::vector<std::string> base_paths;
// Different hosts may mount different cgroup dirs. Try a couple for robustness.
const std::vector<std::string> cgroup_dirs = {"cpu,cpuacct", "cpu", "pids"};

Expand All @@ -43,7 +45,7 @@ StatusOr<std::string> CGroupBasePath(std::string_view sysfs_path) {
std::string base_path = absl::StrCat(sysfs_path, "/cgroup/", cgroup_dir);

if (fs::Exists(base_path)) {
return base_path;
base_paths.push_back(base_path);
}
}

Expand All @@ -52,12 +54,18 @@ StatusOr<std::string> CGroupBasePath(std::string_view sysfs_path) {
auto fs_status = statfs(cgv2_base_path.c_str(), &info);
bool cgroupv2 = (fs_status == 0) && (info.f_type == CGROUP2_SUPER_MAGIC);

if (cgroupv2 || FLAGS_force_cgroup2_mode) {
return cgv2_base_path;
if (cgroupv2 || FLAGS_test_only_force_cgroup2_mode) {
if (FLAGS_test_only_force_cgroup2_mode) {
return std::vector{cgv2_base_path};
}
base_paths.push_back(cgv2_base_path);
}
// (TODO): This check for cgroup2FS is eventually to be moved above the cgroupv1 check.

return error::NotFound("Could not find CGroup base path");
if (base_paths.empty()) {
return error::NotFound("Could not find CGroup base path");
}
return base_paths;
}

StatusOr<std::string> FindSelfCGroupProcs(std::string_view base_path) {
Expand Down Expand Up @@ -137,17 +145,32 @@ StatusOr<CGroupTemplateSpec> CreateCGroupTemplateSpecFromPath(std::string_view p
}

StatusOr<CGroupTemplateSpec> AutoDiscoverCGroupTemplate(std::string_view sysfs_path) {
PX_ASSIGN_OR_RETURN(std::string base_path, CGroupBasePath(sysfs_path));
LOG(INFO) << "Auto-discovered CGroup base path: " << base_path;

PX_ASSIGN_OR_RETURN(std::string self_cgroup_procs, FindSelfCGroupProcs(base_path));
LOG(INFO) << "Auto-discovered example path: " << self_cgroup_procs;

PX_ASSIGN_OR_RETURN(CGroupTemplateSpec cgroup_path_template,
CreateCGroupTemplateSpecFromPath(self_cgroup_procs));
LOG(INFO) << "Auto-discovered template: " << cgroup_path_template.templated_path;
PX_ASSIGN_OR_RETURN(std::vector<std::string> base_paths, CGroupBasePaths(sysfs_path));
for (const auto& base_path : base_paths) {
LOG(INFO) << "Auto-discovered CGroup base path: " << base_path;

auto self_cgroup_procs_status = FindSelfCGroupProcs(base_path);
if (!self_cgroup_procs_status.ok()) {
LOG(WARNING) << "Could not find self in cgroup procs. Trying next base path.";
continue;
}
auto self_cgroup_procs = self_cgroup_procs_status.ConsumeValueOrDie();
LOG(INFO) << "Auto-discovered example path: " << self_cgroup_procs;

auto cgroup_path_template_status = CreateCGroupTemplateSpecFromPath(self_cgroup_procs);
if (!cgroup_path_template_status.ok()) {
LOG(WARNING) << absl::Substitute(
"Failed to create cgroup template spec from path $0. Trying next base path.",
self_cgroup_procs);
continue;
}
auto cgroup_path_template = cgroup_path_template_status.ConsumeValueOrDie();
LOG(INFO) << "Auto-discovered template: " << cgroup_path_template.templated_path;

return cgroup_path_template;
return cgroup_path_template;
}
return error::NotFound("Unable to auto discover cgroup template from $0",
absl::StrJoin(base_paths, ", "));
}

StatusOr<std::unique_ptr<CGroupPathResolver>> CGroupPathResolver::Create(
Expand Down Expand Up @@ -222,51 +245,52 @@ Status LegacyCGroupPathResolver::Init(std::string_view sysfs_path) {
// $2 = container runtime
// These template parameters are resolved by calls to PodPath.
// Different hosts may mount different cgroup dirs. Try a couple for robustness.
PX_ASSIGN_OR_RETURN(std::string cgroup_dir, CGroupBasePath(sysfs_path));

// Attempt assuming naming scheme #1.
std::string cgroup_kubepods_base_path = absl::Substitute("$0/kubepods", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/pod$0/$1/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/besteffort/pod$0/$1/cgroup.procs");
cgroup_kubepod_burstable_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/burstable/pod$0/$1/cgroup.procs");
cgroup_kubepod_convert_dashes_ = false;
return Status::OK();
}
PX_ASSIGN_OR_RETURN(std::vector<std::string> cgroup_dirs, CGroupBasePaths(sysfs_path));

// Attempt assuming naming scheme #3.
// Must be before the scheme below, since there have been systems that have both paths,
// but this must take priority.
cgroup_kubepods_base_path = absl::Substitute("$0/system.slice/containerd.service", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/kubepods-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ = absl::StrCat(
cgroup_kubepods_base_path, "/kubepods-besteffort-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_burstable_path_template_ = absl::StrCat(
cgroup_kubepods_base_path, "/kubepods-burstable-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_convert_dashes_ = true;
return Status::OK();
}
for (const auto& cgroup_dir : cgroup_dirs) {
// Attempt assuming naming scheme #1.
std::string cgroup_kubepods_base_path = absl::Substitute("$0/kubepods", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/pod$0/$1/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/besteffort/pod$0/$1/cgroup.procs");
cgroup_kubepod_burstable_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/burstable/pod$0/$1/cgroup.procs");
cgroup_kubepod_convert_dashes_ = false;
return Status::OK();
}

// Attempt assuming naming scheme #2.
cgroup_kubepods_base_path = absl::Substitute("$0/kubepods.slice", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/kubepods-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ = absl::StrCat(
cgroup_kubepods_base_path,
"/kubepods-besteffort.slice/kubepods-besteffort-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_burstable_path_template_ = absl::StrCat(
cgroup_kubepods_base_path,
"/kubepods-burstable.slice/kubepods-burstable-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_convert_dashes_ = true;
return Status::OK();
}
// Attempt assuming naming scheme #3.
// Must be before the scheme below, since there have been systems that have both paths,
// but this must take priority.
cgroup_kubepods_base_path = absl::Substitute("$0/system.slice/containerd.service", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/kubepods-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ = absl::StrCat(
cgroup_kubepods_base_path, "/kubepods-besteffort-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_burstable_path_template_ = absl::StrCat(
cgroup_kubepods_base_path, "/kubepods-burstable-pod$0.slice:$2:$1/cgroup.procs");
cgroup_kubepod_convert_dashes_ = true;
return Status::OK();
}

// Attempt assuming naming scheme #2.
cgroup_kubepods_base_path = absl::Substitute("$0/kubepods.slice", cgroup_dir);
if (fs::Exists(cgroup_kubepods_base_path)) {
cgroup_kubepod_guaranteed_path_template_ =
absl::StrCat(cgroup_kubepods_base_path, "/kubepods-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_besteffort_path_template_ = absl::StrCat(
cgroup_kubepods_base_path,
"/kubepods-besteffort.slice/kubepods-besteffort-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_burstable_path_template_ = absl::StrCat(
cgroup_kubepods_base_path,
"/kubepods-burstable.slice/kubepods-burstable-pod$0.slice/$2-$1.scope/cgroup.procs");
cgroup_kubepod_convert_dashes_ = true;
return Status::OK();
}
}
return error::NotFound("Could not find kubepods slice under sysfs ($0)", sysfs_path);
}

Expand Down
2 changes: 1 addition & 1 deletion src/shared/metadata/cgroup_path_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "src/common/system/system.h"
#include "src/shared/metadata/k8s_objects.h"

DECLARE_bool(force_cgroup2_mode);
DECLARE_bool(test_only_force_cgroup2_mode);

namespace px {
namespace md {
Expand Down
15 changes: 13 additions & 2 deletions src/shared/metadata/cgroup_path_resolver_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ TEST(LegacyCGroupPathResolverTest, StandardFormat) {
ContainerType::kContainerd));
}

TEST(LeagcyCGroupPathResolverTest, Cgroup2Format) {
TEST(LegacyCGroupPathResolverTest, Cgroup2Format) {
PX_SET_FOR_SCOPE(FLAGS_test_only_force_cgroup2_mode, true);
ASSERT_OK_AND_ASSIGN(
auto path_resolver,
LegacyCGroupPathResolver::Create(GetSysFsPathFromTestDataFile(
Expand All @@ -326,7 +327,6 @@ TEST(LeagcyCGroupPathResolverTest, Cgroup2Format) {
"cgroup.procs",
"testdata/sysfs3")));

FLAGS_force_cgroup2_mode = true;
EXPECT_EQ(
GetPathToTestDataFile(
"testdata/sysfs3/cgroup/kubepods.slice/kubepods-besteffort.slice/"
Expand Down Expand Up @@ -379,5 +379,16 @@ TEST(CGroupPathResolver, Cgroup2Format) {
"docker-a7638fe3934b37419cc56bca73465a02b354ba6e98e10272542d84eb2014dd62.scope/cgroup.procs");
}

/**
* TODO(ddelnano): Refactor the cgroup resolver code so that logic within AutoDiscoverCGroupPath
* and CGroupBasePaths can be tested. OVH's managed k8s failed to work with our logic because
* it enables cgroup v1 and v2 while the PEM existed in a v2 cgroup. Ideally our tests would
* cover the following scenarios for both LegacyCGroupPathResolver and CGroupPathResolver:
* 1. cgroup1 only
* 2. cgroup2 only
* 3. cgroup1+cgroup2 w/ cgroup1 failing (gh#XXX bug)
* 4. cgroup1+cgroup2 w/ cgroup1 succeeding
*/

} // namespace md
} // namespace px

0 comments on commit 32fa98c

Please sign in to comment.