From b295d2d34b57edfb130cfc084561b10741adc958 Mon Sep 17 00:00:00 2001 From: "Maciej \"Iwan\" Iwanowski" Date: Fri, 12 Feb 2021 21:25:39 +0100 Subject: [PATCH 1/2] Fixes #2792 - sched_getaffinity does not return number of online CPUs --- container/libcontainer/handler.go | 51 +++----------------------- container/libcontainer/handler_test.go | 15 ++------ 2 files changed, 9 insertions(+), 57 deletions(-) diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go index 436379b762..4c18bd3bf0 100644 --- a/container/libcontainer/handler.go +++ b/container/libcontainer/handler.go @@ -29,14 +29,13 @@ import ( "strings" "time" - "github.com/google/cadvisor/container" - info "github.com/google/cadvisor/info/v1" - "golang.org/x/sys/unix" - "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" - fs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "k8s.io/klog/v2" + + "github.com/google/cadvisor/container" + info "github.com/google/cadvisor/info/v1" ) var ( @@ -758,16 +757,6 @@ func (h *Handler) GetProcesses() ([]int, error) { return pids, nil } -func minUint32(x, y uint32) uint32 { - if x < y { - return x - } - return y -} - -// var to allow unit tests to stub it out -var numCpusFunc = getNumberOnlineCPUs - // Convert libcontainer stats to info.ContainerStats. func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) { ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode @@ -785,37 +774,7 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) { // cpuacct subsystem. return } - - numPossible := uint32(len(s.CpuStats.CpuUsage.PercpuUsage)) - // Note that as of https://patchwork.kernel.org/patch/8607101/ (kernel v4.7), - // the percpu usage information includes extra zero values for all additional - // possible CPUs. This is to allow statistic collection after CPU-hotplug. - // We intentionally ignore these extra zeroes. - numActual, err := numCpusFunc() - if err != nil { - klog.Errorf("unable to determine number of actual cpus; defaulting to maximum possible number: errno %v", err) - numActual = numPossible - } - if numActual > numPossible { - // The real number of cores should never be greater than the number of - // datapoints reported in cpu usage. - klog.Errorf("PercpuUsage had %v cpus, but the actual number is %v; ignoring extra CPUs", numPossible, numActual) - } - numActual = minUint32(numPossible, numActual) - ret.Cpu.Usage.PerCpu = make([]uint64, numActual) - - for i := uint32(0); i < numActual; i++ { - ret.Cpu.Usage.PerCpu[i] = s.CpuStats.CpuUsage.PercpuUsage[i] - } - -} - -func getNumberOnlineCPUs() (uint32, error) { - var availableCPUs unix.CPUSet - if err := unix.SchedGetaffinity(0, &availableCPUs); err != nil { - return 0, err - } - return uint32(availableCPUs.Count()), nil + ret.Cpu.Usage.PerCpu = s.CpuStats.CpuUsage.PercpuUsage } func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) { diff --git a/container/libcontainer/handler_test.go b/container/libcontainer/handler_test.go index 21b2ff473b..1046c6bfab 100644 --- a/container/libcontainer/handler_test.go +++ b/container/libcontainer/handler_test.go @@ -96,18 +96,11 @@ const nanosecondsInSeconds = 1000000000 // https://github.com/containerd/cgroups/pull/12 const clockTicks = 100 -func TestMorePossibleCPUs(t *testing.T) { - realNumCPUs := uint32(8) - numCpusFunc = func() (uint32, error) { - return realNumCPUs, nil - } - possibleCPUs := uint32(31) - - perCPUUsage := make([]uint64, possibleCPUs) - for i := uint32(0); i < realNumCPUs; i++ { +func TestSetCPUStats(t *testing.T) { + perCPUUsage := make([]uint64, 31) + for i := uint32(0); i < 31; i++ { perCPUUsage[i] = 8562955455524 } - s := &cgroups.Stats{ CpuStats: cgroups.CpuStats{ CpuUsage: cgroups.CpuUsage{ @@ -124,7 +117,7 @@ func TestMorePossibleCPUs(t *testing.T) { expected := info.ContainerStats{ Cpu: info.CpuStats{ Usage: info.CpuUsage{ - PerCpu: perCPUUsage[0:realNumCPUs], + PerCpu: perCPUUsage, User: s.CpuStats.CpuUsage.UsageInUsermode, System: s.CpuStats.CpuUsage.UsageInKernelmode, Total: 33802947350272, From 51bf0b2c7c03913cce5d20e3abd8ea85289a8999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 5 Feb 2021 16:21:28 +0100 Subject: [PATCH 2/2] Fix incorrect CPU topology on single NUMA and multi socket platform. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- utils/sysinfo/sysinfo.go | 32 ++--- utils/sysinfo/sysinfo_test.go | 245 ++++++++++++++++++++++++---------- 2 files changed, 193 insertions(+), 84 deletions(-) diff --git a/utils/sysinfo/sysinfo.go b/utils/sysinfo/sysinfo.go index 9ef62b8580..eade34195e 100644 --- a/utils/sysinfo/sysinfo.go +++ b/utils/sysinfo/sysinfo.go @@ -383,7 +383,7 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) { for _, cpuDir := range cpuDirs { cpuID, err := getMatchedInt(cpuDirRegExp, cpuDir) if err != nil { - return nil, fmt.Errorf("Unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir) + return nil, fmt.Errorf("unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir) } if !sysFs.IsCPUOnline(cpuDir) { continue @@ -401,9 +401,22 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) { return nil, err } + rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir) + if os.IsNotExist(err) { + klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err) + continue + } else if err != nil { + return nil, err + } + + physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID) + if err != nil { + return nil, err + } + coreIDx := -1 for id, core := range cores { - if core.Id == physicalID { + if core.Id == physicalID && core.SocketID == physicalPackageID { coreIDx = id } } @@ -414,25 +427,14 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) { desiredCore := &cores[coreIDx] desiredCore.Id = physicalID + desiredCore.SocketID = physicalPackageID + if len(desiredCore.Threads) == 0 { desiredCore.Threads = []int{cpuID} } else { desiredCore.Threads = append(desiredCore.Threads, cpuID) } - rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir) - if os.IsNotExist(err) { - klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err) - continue - } else if err != nil { - return nil, err - } - - physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID) - if err != nil { - return nil, err - } - desiredCore.SocketID = physicalPackageID } return cores, nil } diff --git a/utils/sysinfo/sysinfo_test.go b/utils/sysinfo/sysinfo_test.go index 25d18bc799..674b5a6c0e 100644 --- a/utils/sysinfo/sysinfo_test.go +++ b/utils/sysinfo/sysinfo_test.go @@ -104,71 +104,62 @@ func TestGetHugePagesInfoWithWrongNrHugePageValue(t *testing.T) { } func TestGetNodesInfo(t *testing.T) { - fakeSys := &fakesysfs.FakeSysFs{} - c := sysfs.CacheInfo{ - Size: 32 * 1024, - Type: "unified", - Level: 3, - Cpus: 2, - } - fakeSys.SetCacheInfo(c) - - nodesPaths := []string{ - "/fakeSysfs/devices/system/node/node0", - "/fakeSysfs/devices/system/node/node1", - } - fakeSys.SetNodesPaths(nodesPaths, nil) - - cpusPaths := map[string][]string{ - "/fakeSysfs/devices/system/node/node0": { - "/fakeSysfs/devices/system/node/node0/cpu0", - "/fakeSysfs/devices/system/node/node0/cpu1", - }, - "/fakeSysfs/devices/system/node/node1": { - "/fakeSysfs/devices/system/node/node0/cpu2", - "/fakeSysfs/devices/system/node/node0/cpu3", - }, - } - fakeSys.SetCPUsPaths(cpusPaths, nil) - - coreThread := map[string]string{ - "/fakeSysfs/devices/system/node/node0/cpu0": "0", - "/fakeSysfs/devices/system/node/node0/cpu1": "0", - "/fakeSysfs/devices/system/node/node0/cpu2": "1", - "/fakeSysfs/devices/system/node/node0/cpu3": "1", - } - fakeSys.SetCoreThreads(coreThread, nil) - - memTotal := "MemTotal: 32817192 kB" - fakeSys.SetMemory(memTotal, nil) - - hugePages := []os.FileInfo{ - &fakesysfs.FileInfo{EntryName: "hugepages-2048kB"}, - } - fakeSys.SetHugePages(hugePages, nil) - - hugePageNr := map[string]string{ - "/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1", - "/fakeSysfs/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages": "1", - } - fakeSys.SetHugePagesNr(hugePageNr, nil) - - physicalPackageIDs := map[string]string{ - "/fakeSysfs/devices/system/node/node0/cpu0": "0", - "/fakeSysfs/devices/system/node/node0/cpu1": "0", - "/fakeSysfs/devices/system/node/node0/cpu2": "1", - "/fakeSysfs/devices/system/node/node0/cpu3": "1", - } - fakeSys.SetPhysicalPackageIDs(physicalPackageIDs, nil) - - nodes, cores, err := GetNodesInfo(fakeSys) - assert.Nil(t, err) - assert.Equal(t, 2, len(nodes)) - assert.Equal(t, 4, cores) - - nodesJSON, err := json.Marshal(nodes) - assert.Nil(t, err) - expectedNodes := ` + testCases := []struct { + cache sysfs.CacheInfo + nodesPaths []string + cpusPaths map[string][]string + coresThreads map[string]string + memTotal string + hugePages []os.FileInfo + hugePageNr map[string]string + physicalPackageIDs map[string]string + nodes int + cores int + expectedNodes string + }{ + { + sysfs.CacheInfo{ + Size: 32 * 1024, + Type: "unified", + Level: 3, + Cpus: 2, + }, + []string{ + "/fakeSysfs/devices/system/node/node0", + "/fakeSysfs/devices/system/node/node1"}, + map[string][]string{ + "/fakeSysfs/devices/system/node/node0": { + "/fakeSysfs/devices/system/node/node0/cpu0", + "/fakeSysfs/devices/system/node/node0/cpu1", + }, + "/fakeSysfs/devices/system/node/node1": { + "/fakeSysfs/devices/system/node/node0/cpu2", + "/fakeSysfs/devices/system/node/node0/cpu3", + }, + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/cpu0": "0", + "/fakeSysfs/devices/system/node/node0/cpu1": "0", + "/fakeSysfs/devices/system/node/node0/cpu2": "1", + "/fakeSysfs/devices/system/node/node0/cpu3": "1", + }, + "MemTotal: 32817192 kB", + []os.FileInfo{ + &fakesysfs.FileInfo{EntryName: "hugepages-2048kB"}, + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1", + "/fakeSysfs/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages": "1", + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/cpu0": "0", + "/fakeSysfs/devices/system/node/node0/cpu1": "0", + "/fakeSysfs/devices/system/node/node0/cpu2": "1", + "/fakeSysfs/devices/system/node/node0/cpu3": "1", + }, + 2, + 4, + ` [ { "node_id": 0, @@ -227,8 +218,124 @@ func TestGetNodesInfo(t *testing.T) { ] } ] - ` - assert.JSONEq(t, expectedNodes, string(nodesJSON)) + `, + }, + { + sysfs.CacheInfo{ + Size: 32 * 1024, + Type: "unified", + Level: 3, + Cpus: 6, + }, + []string{ + "/fakeSysfs/devices/system/node/node0"}, + map[string][]string{ + "/fakeSysfs/devices/system/node/node0": { + "/fakeSysfs/devices/system/node/node0/cpu0", + "/fakeSysfs/devices/system/node/node0/cpu1", + "/fakeSysfs/devices/system/node/node0/cpu2", + "/fakeSysfs/devices/system/node/node0/cpu3", + "/fakeSysfs/devices/system/node/node0/cpu4", + "/fakeSysfs/devices/system/node/node0/cpu5", + }, + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/cpu0": "0", + "/fakeSysfs/devices/system/node/node0/cpu1": "0", + "/fakeSysfs/devices/system/node/node0/cpu2": "1", + "/fakeSysfs/devices/system/node/node0/cpu3": "1", + "/fakeSysfs/devices/system/node/node0/cpu4": "2", + "/fakeSysfs/devices/system/node/node0/cpu5": "2", + }, + "MemTotal: 32817192 kB", + []os.FileInfo{ + &fakesysfs.FileInfo{EntryName: "hugepages-2048kB"}, + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1", + }, + map[string]string{ + "/fakeSysfs/devices/system/node/node0/cpu0": "0", + "/fakeSysfs/devices/system/node/node0/cpu1": "0", + "/fakeSysfs/devices/system/node/node0/cpu2": "1", + "/fakeSysfs/devices/system/node/node0/cpu3": "1", + "/fakeSysfs/devices/system/node/node0/cpu4": "2", + "/fakeSysfs/devices/system/node/node0/cpu5": "2", + }, + 1, + 6, + ` + [ + { + "node_id": 0, + "memory": 33604804608, + "hugepages": [ + { + "page_size": 2048, + "num_pages": 1 + } + ], + "cores": [ + { + "core_id": 0, + "thread_ids": [ + 0, + 1 + ], + "caches": null, + "socket_id": 0 + }, + { + "core_id": 1, + "thread_ids": [ + 2, + 3 + ], + "caches": null, + "socket_id": 1 + }, + { + "core_id": 2, + "thread_ids": [ + 4, + 5 + ], + "caches": null, + "socket_id": 2 + } + ], + "caches": [ + { + "size": 32768, + "type": "unified", + "level": 3 + } + ] + } + ] + `, + }, + } + + for _, test := range testCases { + fakeSys := &fakesysfs.FakeSysFs{} + fakeSys.SetCacheInfo(test.cache) + fakeSys.SetNodesPaths(test.nodesPaths, nil) + fakeSys.SetCPUsPaths(test.cpusPaths, nil) + fakeSys.SetCoreThreads(test.coresThreads, nil) + fakeSys.SetMemory(test.memTotal, nil) + fakeSys.SetHugePages(test.hugePages, nil) + fakeSys.SetHugePagesNr(test.hugePageNr, nil) + fakeSys.SetPhysicalPackageIDs(test.physicalPackageIDs, nil) + nodes, cores, err := GetNodesInfo(fakeSys) + assert.Nil(t, err) + assert.Equal(t, test.nodes, len(nodes)) + assert.Equal(t, test.cores, cores) + + nodesJSON, err := json.Marshal(nodes) + assert.Nil(t, err) + assert.JSONEq(t, test.expectedNodes, string(nodesJSON)) + } } func TestGetNodesInfoWithOfflineCPUs(t *testing.T) { @@ -996,7 +1103,7 @@ func TestGetNodesWhenPhysicalPackageIDMissingForOneCPU(t *testing.T) { assert.Nil(t, err) assert.Equal(t, 1, len(nodes)) - assert.Equal(t, 2, cores) + assert.Equal(t, 1, cores) sort.Slice(nodes, func(i, j int) bool { return nodes[i].Id < nodes[j].Id @@ -1019,7 +1126,7 @@ func TestGetNodesWhenPhysicalPackageIDMissingForOneCPU(t *testing.T) { { "core_id":0, "thread_ids":[ - 0, 1 + 0 ], "caches":null, "socket_id":0