Skip to content

Commit

Permalink
Add kepler dockerfile to test new build process (#1112)
Browse files Browse the repository at this point in the history
* chore(build): Update go version to 1.20 in go.mod

Signed-off-by: Vimal Kumar <[email protected]>

* chore(lint): Fix lint errors

Signed-off-by: Vimal Kumar <[email protected]>

* WIP chore(build): Add Dockerfile to build kepler

Signed-off-by: Vimal Kumar <[email protected]>

---------

Signed-off-by: Vimal Kumar <[email protected]>
  • Loading branch information
vimalk78 authored Dec 11, 2023
1 parent e2c9397 commit f020c41
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 72 deletions.
34 changes: 34 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM quay.io/sustainable_computing_io/kepler_builder:ubi-9-libbpf-1.2.0 as builder

WORKDIR /workspace

COPY . .

RUN ATTACHER_TAG=libbpf make build

FROM registry.access.redhat.com/ubi9-minimal:9.2
RUN microdnf -y update

ENV NVIDIA_VISIBLE_DEVICES=all

RUN INSTALL_PKGS=" \
libbpf \
" && \
microdnf install -y $INSTALL_PKGS && \
microdnf clean all

COPY --from=builder /workspace/_output/bin/kepler /usr/bin/kepler
COPY --from=builder /libbpf-source/linux-5.14.0-333.el9/tools/bpf/bpftool/bpftool /usr/bin/bpftool
COPY --from=builder /usr/bin/cpuid /usr/bin/cpuid

RUN mkdir -p /var/lib/kepler/data
RUN mkdir -p /var/lib/kepler/bpfassets
COPY --from=builder /workspace/data/cpus.yaml /var/lib/kepler/data/cpus.yaml
COPY --from=builder /workspace/bpfassets/libbpf/bpf.o /var/lib/kepler/bpfassets

# copy model weight
COPY --from=builder /workspace/data/model_weight/acpi_AbsPowerModel.json /var/lib/kepler/data/acpi_AbsPowerModel.json
COPY --from=builder /workspace/data/model_weight/acpi_DynPowerModel.json /var/lib/kepler/data/acpi_DynPowerModel.json
COPY --from=builder /workspace/data/model_weight/rapl_AbsPowerModel.json /var/lib/kepler/data/rapl_AbsPowerModel.json
COPY --from=builder /workspace/data/model_weight/rapl_DynPowerModel.json /var/lib/kepler/data/rapl_DynPowerModel.json
ENTRYPOINT ["/usr/bin/kepler"]
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/sustainable-computing-io/kepler

go 1.18
go 1.20

require (
github.com/NVIDIA/go-nvml v0.11.6-0
Expand Down
30 changes: 16 additions & 14 deletions pkg/collector/metric/node_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const (
OTHER = "other"
PLATFORM = "platform"
FREQUENCY = "frequency"
DYN = "_DYN"
IDLE = "_IDLE"
)

var (
Expand Down Expand Up @@ -228,46 +230,46 @@ func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize
case config.GpuUsageMetric: // for GPU resource usage
featureValues = append(featureValues, normalize(ne.ResourceUsage[config.GpuUsageMetric], shouldNormalize))

case PKG + "_DYN": // for dynamic PKG power consumption
case PKG + DYN: // for dynamic PKG power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(PKG)), shouldNormalize))

case CORE + "_DYN": // for dynamic CORE power consumption
case CORE + DYN: // for dynamic CORE power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(CORE)), shouldNormalize))

case DRAM + "_DYN": // for dynamic DRAM power consumption
case DRAM + DYN: // for dynamic DRAM power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(DRAM)), shouldNormalize))

case UNCORE + "_DYN": // for dynamic UNCORE power consumption
case UNCORE + DYN: // for dynamic UNCORE power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(UNCORE)), shouldNormalize))

case OTHER + "_DYN": // for dynamic OTHER power consumption
case OTHER + DYN: // for dynamic OTHER power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(OTHER)), shouldNormalize))

case PLATFORM + "_DYN": // for dynamic PLATFORM power consumption
case PLATFORM + DYN: // for dynamic PLATFORM power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(PLATFORM)), shouldNormalize))

case GPU + "_DYN": // for dynamic GPU power consumption
case GPU + DYN: // for dynamic GPU power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaDynEnergyFromAllSources(GPU)), shouldNormalize))

case PKG + "_IDLE": // for idle PKG power consumption
case PKG + IDLE: // for idle PKG power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(PKG)), shouldNormalize))

case CORE + "_IDLE": // for idle CORE power consumption
case CORE + IDLE: // for idle CORE power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(CORE)), shouldNormalize))

case DRAM + "_IDLE": // for idle DRAM power consumption
case DRAM + IDLE: // for idle DRAM power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(DRAM)), shouldNormalize))

case UNCORE + "_IDLE": // for idle UNCORE power consumption
case UNCORE + IDLE: // for idle UNCORE power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(UNCORE)), shouldNormalize))

case OTHER + "_IDLE": // for idle OTHER power consumption
case OTHER + IDLE: // for idle OTHER power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(OTHER)), shouldNormalize))

case PLATFORM + "_IDLE": // for idle PLATFORM power consumption
case PLATFORM + IDLE: // for idle PLATFORM power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(PLATFORM)), shouldNormalize))

case GPU + "_IDLE": // for idle GPU power consumption
case GPU + IDLE: // for idle GPU power consumption
featureValues = append(featureValues, normalize(float64(ne.GetSumDeltaIdleEnergyFromAllSources(GPU)), shouldNormalize))

default:
Expand Down
8 changes: 4 additions & 4 deletions pkg/libvirt/resolve_vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package libvirt

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
)

Expand All @@ -32,7 +32,7 @@ func getThreadIDsForPID(pid, fullPath string) []string {

procDir := fmt.Sprintf(fullPath, pid)

files, err := ioutil.ReadDir(procDir)
files, err := os.ReadDir(procDir)
if err != nil {
return nil
}
Expand All @@ -51,7 +51,7 @@ func GetCurrentVMPID(path ...string) (map[string]string, error) {
path = []string{libvirtPath, procPath}
}

files, err := ioutil.ReadDir(path[0])
files, err := os.ReadDir(path[0])
if err != nil {
return nil, err
}
Expand All @@ -63,7 +63,7 @@ func GetCurrentVMPID(path ...string) (map[string]string, error) {

if filepath.Ext(file.Name()) == ".pid" {
filePath := filepath.Join(path[0], file.Name())
content, err := ioutil.ReadFile(filePath)
content, err := os.ReadFile(filePath)
if err != nil {
fmt.Printf("Error reading %s: %v\n", filePath, err)
continue
Expand Down
3 changes: 1 addition & 2 deletions pkg/libvirt/resolve_vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package libvirt

import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
Expand All @@ -34,7 +33,7 @@ func createMockLibvirtDir(directory string) {
}

for _, file := range mockFiles {
err := ioutil.WriteFile(filepath.Join(directory, file.name), []byte(file.content), 0644)
err := os.WriteFile(filepath.Join(directory, file.name), []byte(file.content), 0644)
if err != nil {
panic(err)
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/model/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ func benchmarkNtesting(b *testing.B, containerNumber int) {
nodeMetrics.UpdateDynEnergy()
b.ReportAllocs()
containersMetrics := map[string]*collector_metric.ContainerMetrics{}
const CONTAINER = "container"
for n := 0; n < containerNumber; n++ {
containersMetrics["container"+strconv.Itoa(n)] = collector_metric.NewContainerMetrics("container"+strconv.Itoa(n), "podA", "test", "container"+strconv.Itoa(n))
containersMetrics["container"+strconv.Itoa(n)].BPFStats[config.CoreUsageMetric] = &types.UInt64Stat{}
_ = containersMetrics["container"+strconv.Itoa(n)].BPFStats[config.CoreUsageMetric].AddNewDelta(30000)
containersMetrics[CONTAINER+strconv.Itoa(n)] = collector_metric.NewContainerMetrics(CONTAINER+strconv.Itoa(n), "podA", "test", CONTAINER+strconv.Itoa(n))
containersMetrics[CONTAINER+strconv.Itoa(n)].BPFStats[config.CoreUsageMetric] = &types.UInt64Stat{}
_ = containersMetrics[CONTAINER+strconv.Itoa(n)].BPFStats[config.CoreUsageMetric].AddNewDelta(30000)
}
nodeMetrics.AddNodeResUsageFromContainerResUsage(containersMetrics)
b.ResetTimer()
Expand Down
28 changes: 14 additions & 14 deletions pkg/model/container_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,18 @@ func createContainerPowerModelConfig(powerSourceTarget string, containerFeatureN
// NodeFeatureNames contains the metrics that represents the node resource utilization plus the dynamic and idle power power consumption
modelConfig.NodeFeatureNames = modelConfig.ContainerFeatureNames
modelConfig.NodeFeatureNames = append(modelConfig.NodeFeatureNames, []string{
collector_metric.PKG + "_DYN", // for dynamic PKG power consumption
collector_metric.CORE + "_DYN", // for dynamic CORE power consumption
collector_metric.DRAM + "_DYN", // for dynamic DRAM power consumption
collector_metric.UNCORE + "_DYN", // for dynamic UNCORE power consumption
collector_metric.OTHER + "_DYN", // for dynamic OTHER power consumption
collector_metric.GPU + "_DYN", // for dynamic GPU power consumption
collector_metric.PKG + "_IDLE", // for idle PKG power consumption
collector_metric.CORE + "_IDLE", // for idle CORE power consumption
collector_metric.DRAM + "_IDLE", // for idle DRAM power consumption
collector_metric.UNCORE + "_IDLE", // for idle UNCORE power consumption
collector_metric.OTHER + "_IDLE", // for idle OTHER power consumption
collector_metric.GPU + "_IDLE", // for idle GPU power consumption
collector_metric.PKG + collector_metric.DYN, // for dynamic PKG power consumption
collector_metric.CORE + collector_metric.DYN, // for dynamic CORE power consumption
collector_metric.DRAM + collector_metric.DYN, // for dynamic DRAM power consumption
collector_metric.UNCORE + collector_metric.DYN, // for dynamic UNCORE power consumption
collector_metric.OTHER + collector_metric.DYN, // for dynamic OTHER power consumption
collector_metric.GPU + collector_metric.DYN, // for dynamic GPU power consumption
collector_metric.PKG + collector_metric.IDLE, // for idle PKG power consumption
collector_metric.CORE + collector_metric.IDLE, // for idle CORE power consumption
collector_metric.DRAM + collector_metric.IDLE, // for idle DRAM power consumption
collector_metric.UNCORE + collector_metric.IDLE, // for idle UNCORE power consumption
collector_metric.OTHER + collector_metric.IDLE, // for idle OTHER power consumption
collector_metric.GPU + collector_metric.IDLE, // for idle GPU power consumption
}...)
} else if powerSourceTarget == config.ContainerPlatformPowerKey {
platformUsageMetric := config.CoreUsageMetric
Expand All @@ -96,8 +96,8 @@ func createContainerPowerModelConfig(powerSourceTarget string, containerFeatureN
}
modelConfig.NodeFeatureNames = modelConfig.ContainerFeatureNames
modelConfig.NodeFeatureNames = append(modelConfig.NodeFeatureNames, []string{
collector_metric.PLATFORM + "_DYN", // for dynamic PLATFORM power consumption
collector_metric.PLATFORM + "_IDLE", // for idle PLATFORM power consumption
collector_metric.PLATFORM + collector_metric.DYN, // for dynamic PLATFORM power consumption
collector_metric.PLATFORM + collector_metric.IDLE, // for idle PLATFORM power consumption
}...)
}
}
Expand Down
36 changes: 18 additions & 18 deletions pkg/model/estimator/local/ratio_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,24 +108,24 @@ var _ = Describe("Test Ratio Unit", func() {
config.GpuUsageMetric, // for GPU resource usage
},
NodeFeatureNames: []string{
config.CoreUsageMetric, // for PKG resource usage
config.CoreUsageMetric, // for CORE resource usage
config.DRAMUsageMetric, // for DRAM resource usage
config.GeneralUsageMetric, // for UNCORE resource usage
config.GeneralUsageMetric, // for OTHER resource usage
config.GpuUsageMetric, // for GPU resource usage
collector_metric.PKG + "_DYN", // for dynamic PKG power consumption
collector_metric.CORE + "_DYN", // for dynamic CORE power consumption
collector_metric.DRAM + "_DYN", // for dynamic PKG power consumption
collector_metric.UNCORE + "_DYN", // for dynamic UNCORE power consumption
collector_metric.OTHER + "_DYN", // for dynamic OTHER power consumption
collector_metric.GPU + "_DYN", // for dynamic GPU power consumption
collector_metric.PKG + "_IDLE", // for idle PKG power consumption
collector_metric.CORE + "_IDLE", // for idle CORE power consumption
collector_metric.DRAM + "_IDLE", // for idle PKG power consumption
collector_metric.UNCORE + "_IDLE", // for idle UNCORE power consumption
collector_metric.OTHER + "_IDLE", // for idle OTHER power consumption
collector_metric.GPU + "_IDLE", // for idle GPU power consumption
config.CoreUsageMetric, // for PKG resource usage
config.CoreUsageMetric, // for CORE resource usage
config.DRAMUsageMetric, // for DRAM resource usage
config.GeneralUsageMetric, // for UNCORE resource usage
config.GeneralUsageMetric, // for OTHER resource usage
config.GpuUsageMetric, // for GPU resource usage
collector_metric.PKG + collector_metric.DYN, // for dynamic PKG power consumption
collector_metric.CORE + collector_metric.DYN, // for dynamic CORE power consumption
collector_metric.DRAM + collector_metric.DYN, // for dynamic PKG power consumption
collector_metric.UNCORE + collector_metric.DYN, // for dynamic UNCORE power consumption
collector_metric.OTHER + collector_metric.DYN, // for dynamic OTHER power consumption
collector_metric.GPU + collector_metric.DYN, // for dynamic GPU power consumption
collector_metric.PKG + collector_metric.IDLE, // for idle PKG power consumption
collector_metric.CORE + collector_metric.IDLE, // for idle CORE power consumption
collector_metric.DRAM + collector_metric.IDLE, // for idle PKG power consumption
collector_metric.UNCORE + collector_metric.IDLE, // for idle UNCORE power consumption
collector_metric.OTHER + collector_metric.IDLE, // for idle OTHER power consumption
collector_metric.GPU + collector_metric.IDLE, // for idle GPU power consumption
},
}
model.ResetSampleIdx()
Expand Down
28 changes: 14 additions & 14 deletions pkg/model/process_power.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,18 @@ func createProcessPowerModelConfig(powerSourceTarget string, processFeatureNames
// NodeFeatureNames contains the metrics that represents the node resource utilization plus the dynamic and idle power power consumption
modelConfig.NodeFeatureNames = modelConfig.ContainerFeatureNames
modelConfig.NodeFeatureNames = append(modelConfig.NodeFeatureNames, []string{
collector_metric.PKG + "_DYN", // for dynamic PKG power consumption
collector_metric.CORE + "_DYN", // for dynamic CORE power consumption
collector_metric.DRAM + "_DYN", // for dynamic DRAM power consumption
collector_metric.UNCORE + "_DYN", // for dynamic UNCORE power consumption
collector_metric.OTHER + "_DYN", // for dynamic OTHER power consumption
collector_metric.GPU + "_DYN", // for dynamic GPU power consumption
collector_metric.PKG + "_IDLE", // for idle PKG power consumption
collector_metric.CORE + "_IDLE", // for idle CORE power consumption
collector_metric.DRAM + "_IDLE", // for idle DRAM power consumption
collector_metric.UNCORE + "_IDLE", // for idle UNCORE power consumption
collector_metric.OTHER + "_IDLE", // for idle OTHER power consumption
collector_metric.GPU + "_IDLE", // for idle GPU power consumption
collector_metric.PKG + collector_metric.DYN, // for dynamic PKG power consumption
collector_metric.CORE + collector_metric.DYN, // for dynamic CORE power consumption
collector_metric.DRAM + collector_metric.DYN, // for dynamic DRAM power consumption
collector_metric.UNCORE + collector_metric.DYN, // for dynamic UNCORE power consumption
collector_metric.OTHER + collector_metric.DYN, // for dynamic OTHER power consumption
collector_metric.GPU + collector_metric.DYN, // for dynamic GPU power consumption
collector_metric.PKG + collector_metric.IDLE, // for idle PKG power consumption
collector_metric.CORE + collector_metric.IDLE, // for idle CORE power consumption
collector_metric.DRAM + collector_metric.IDLE, // for idle DRAM power consumption
collector_metric.UNCORE + collector_metric.IDLE, // for idle UNCORE power consumption
collector_metric.OTHER + collector_metric.IDLE, // for idle OTHER power consumption
collector_metric.GPU + collector_metric.IDLE, // for idle GPU power consumption
}...)
} else if powerSourceTarget == config.ProcessPlatformPowerKey {
platformUsageMetric := config.CoreUsageMetric
Expand All @@ -95,8 +95,8 @@ func createProcessPowerModelConfig(powerSourceTarget string, processFeatureNames
}
modelConfig.NodeFeatureNames = modelConfig.ContainerFeatureNames
modelConfig.NodeFeatureNames = append(modelConfig.NodeFeatureNames, []string{
collector_metric.PLATFORM + "_DYN", // for dynamic PLATFORM power consumption
collector_metric.PLATFORM + "_IDLE", // for idle PLATFORM power consumption
collector_metric.PLATFORM + collector_metric.DYN, // for dynamic PLATFORM power consumption
collector_metric.PLATFORM + collector_metric.IDLE, // for idle PLATFORM power consumption
}...)
}
}
Expand Down
3 changes: 1 addition & 2 deletions pkg/power/platform/source/redfish_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"strings"
"time"
Expand Down Expand Up @@ -70,7 +69,7 @@ func getRedfishModel(access RedfishAccessInfo, endpoint string, model interface{
return err
}
defer func() {
if _, err := io.Copy(ioutil.Discard, resp.Body); err != nil {
if _, err := io.Copy(io.Discard, resp.Body); err != nil {
klog.V(0).Infof("Failed to discard response body: %v", err)
}
resp.Body.Close()
Expand Down

0 comments on commit f020c41

Please sign in to comment.