Skip to content

Commit

Permalink
Refactor FlavorAssigner (#3536)
Browse files Browse the repository at this point in the history
  • Loading branch information
gabesaba authored Nov 14, 2024
1 parent 9624ed8 commit c655645
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 53 deletions.
5 changes: 5 additions & 0 deletions pkg/resources/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ func ResourceQuantity(name corev1.ResourceName, v int64) resource.Quantity {
}
}

func ResourceQuantityString(name corev1.ResourceName, v int64) string {
rq := ResourceQuantity(name, v)
return rq.String()
}

func (req Requests) CountIn(capacity Requests) int32 {
var result *int32
for rName, rValue := range req {
Expand Down
56 changes: 23 additions & 33 deletions pkg/scheduler/flavorassigner/flavorassigner.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,47 +603,37 @@ func flavorSelector(spec *corev1.PodSpec, allowedKeys sets.Set[string]) nodeaffi
func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorResource, val int64, rQuota cache.ResourceQuota) (granularMode, bool, *Status) {
var status Status
var borrow bool
used := a.cq.ResourceNode.Usage[fr]
mode := noFit
if val <= rQuota.Nominal {
// The request can be satisfied by the nominal quota, assuming quota is
// reclaimed from the cohort or assuming all active workloads in the
// ClusterQueue are preempted.
mode = preempt
}

if a.canPreemptWhileBorrowing() {
// when preemption with borrowing is enabled, we can succeed to admit the
// workload if preemption is used.
if (rQuota.BorrowingLimit == nil || val <= rQuota.Nominal+*rQuota.BorrowingLimit) && val <= a.cq.PotentialAvailable(fr) {
mode = preempt
borrow = val > rQuota.Nominal
}
}
if rQuota.BorrowingLimit != nil && used+val > rQuota.Nominal+*rQuota.BorrowingLimit {
status.append(fmt.Sprintf("borrowing limit for %s in flavor %s exceeded", fr.Resource, fr.Flavor))
return mode, borrow, &status
}
available := a.cq.Available(fr)
maxCapacity := a.cq.PotentialAvailable(fr)

if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) {
mode = reclaim
// No Fit
if val > maxCapacity {
status.append(fmt.Sprintf("insufficient quota for %s in flavor %s, request > maximum capacity (%s > %s)",
fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val), resources.ResourceQuantityString(fr.Resource, maxCapacity)))
return noFit, false, &status
}

lack := val - a.cq.Available(fr)
if lack <= 0 {
return fit, used+val > rQuota.Nominal, nil
// Fit
if val <= available {
return fit, a.cq.ResourceNode.Usage[fr]+val > rQuota.Nominal, nil
}

lackQuantity := resources.ResourceQuantity(fr.Resource, lack)
msg := fmt.Sprintf("insufficient unused quota in cohort for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity)
if !a.cq.HasParent() {
if mode == noFit {
msg = fmt.Sprintf("insufficient quota for %s in flavor %s in ClusterQueue", fr.Resource, fr.Flavor)
} else {
msg = fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity)
// Check if preemption is possible
mode := noFit
if val <= rQuota.Nominal {
mode = preempt
if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) {
mode = reclaim
}
} else if a.canPreemptWhileBorrowing() {
mode = preempt
borrow = true
}
status.append(msg)

status.append(fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed",
fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val-available)))

return mode, borrow, &status
}

Expand Down
33 changes: 16 additions & 17 deletions pkg/scheduler/flavorassigner/flavorassigner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package flavorassigner

import (
"fmt"
"testing"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -254,7 +253,7 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for memory in flavor b_one in ClusterQueue",
"insufficient quota for memory in flavor b_one, request > maximum capacity (10Mi > 1Mi)",
},
},
Count: 1,
Expand Down Expand Up @@ -366,9 +365,9 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient unused quota in cohort for cpu in flavor one, 1 more needed",
"insufficient unused quota in cohort for memory in flavor two, 5Mi more needed",
"insufficient unused quota in cohort for example.com/gpu in flavor b_one, 1 more needed",
"insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)",
"insufficient unused quota for memory in flavor two, 5Mi more needed",
"insufficient unused quota for example.com/gpu in flavor b_one, 1 more needed",
},
},
Count: 1,
Expand Down Expand Up @@ -407,8 +406,8 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for cpu in flavor one in ClusterQueue",
"insufficient quota for memory in flavor two in ClusterQueue",
"insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)",
"insufficient quota for memory in flavor two, request > maximum capacity (10Mi > 5Mi)",
},
},
Count: 1,
Expand Down Expand Up @@ -836,7 +835,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: resource.MustParse("2"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"},
reasons: []string{"insufficient quota for cpu in flavor one, request > maximum capacity (2 > 1)"},
},
Count: 1,
}},
Expand Down Expand Up @@ -882,7 +881,7 @@ func TestAssignFlavors(t *testing.T) {
},

Status: &Status{
reasons: []string{"borrowing limit for cpu in flavor one exceeded"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -963,7 +962,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: resource.MustParse("2"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 2 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 2 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -1080,7 +1079,7 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for cpu in flavor one in ClusterQueue",
"insufficient quota for cpu in flavor one, request > maximum capacity (12 > 4)",
"insufficient unused quota for cpu in flavor tainted, 3 more needed",
},
},
Expand Down Expand Up @@ -1176,7 +1175,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourcePods: resource.MustParse("3"),
},
Status: &Status{
reasons: []string{fmt.Sprintf("insufficient quota for %s in flavor default in ClusterQueue", corev1.ResourcePods)},
reasons: []string{"insufficient quota for pods in flavor default, request > maximum capacity (3 > 2)"},
},
Count: 3,
}},
Expand Down Expand Up @@ -1504,7 +1503,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1561,7 +1560,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1660,7 +1659,7 @@ func TestAssignFlavors(t *testing.T) {
{
Name: "main",
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 11 more needed"},
reasons: []string{"insufficient quota for cpu in flavor one, request > maximum capacity (12 > 11)"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1813,7 +1812,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourcePods: resource.MustParse("1"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -1861,7 +1860,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for cpu in flavor default, 2 more needed",
Message: "couldn't assign flavors to pod set main: insufficient quota for cpu in flavor default, request > maximum capacity (10 > 8)",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down
4 changes: 2 additions & 2 deletions test/integration/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2421,7 +2421,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 1Gi more needed",
Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down Expand Up @@ -2463,7 +2463,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 1Gi more needed",
Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down

0 comments on commit c655645

Please sign in to comment.