Skip to content

Commit

Permalink
fix bug in metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
archlitchi committed Jun 30, 2023
1 parent 899dcf0 commit 651d543
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,9 @@ Fix device memory calculation error after container crashloop
Fix env cuda_oversubscribe not set properly when MemoryScaling < 1

Fix MemoryScaling not working when set < 1

## v2.2.15

Move shared-memory from from /tmp/xxx.cache to /usr/local/vgpu/xxx.cache inside container

Add Deviceidx to scheduler monitor apis(31993)
2 changes: 1 addition & 1 deletion charts/vgpu/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
nameOverride: ""
fullnameOverride: ""
imagePullSecrets: []
version: "v2.2.14"
version: "v2.2.15"

#Nvidia GPU Parameters
resourceName: "nvidia.com/gpu"
Expand Down
6 changes: 3 additions & 3 deletions cmd/scheduler/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,17 +149,17 @@ func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) {
ctrvGPUDeviceAllocatedDesc := prometheus.NewDesc(
"vGPUPodsDeviceAllocated",
"vGPU Allocated from pods",
[]string{"namespace", "nodename", "podname", "containeridx", "deviceuuid", "deviceidx", "deviceusedcore"}, nil,
[]string{"namespace", "nodename", "podname", "containeridx", "deviceuuid", "deviceusedcore"}, nil,
)
ctrvGPUdeviceAllocatedMemoryPercentageDesc := prometheus.NewDesc(
"vGPUMemoryPercentage",
"vGPU memory percentage allocated from a container",
[]string{"namespace", "nodename", "podname", "containeridx", "deviceuuid", "deviceidx"}, nil,
[]string{"namespace", "nodename", "podname", "containeridx", "deviceuuid"}, nil,
)
ctrvGPUdeviceAllocateCorePercentageDesc := prometheus.NewDesc(
"vGPUCorePercentage",
"vGPU core allocated from a container",
[]string{"namespace", "nmodename", "podname", "containeridx", "deviceuuid", "deviceidx"}, nil,
[]string{"namespace", "nmodename", "podname", "containeridx", "deviceuuid"}, nil,
)
schedpods, _ := sher.GetScheduledPods()
for _, val := range schedpods {
Expand Down

0 comments on commit 651d543

Please sign in to comment.