From c6556458c39d103d260ca8c0bbeac15d9bea5352 Mon Sep 17 00:00:00 2001 From: gabesaba <15304068+gabesaba@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:48:52 +0100 Subject: [PATCH] Refactor FlavorAssigner (#3536) --- pkg/resources/requests.go | 5 ++ .../flavorassigner/flavorassigner.go | 56 ++++++++----------- .../flavorassigner/flavorassigner_test.go | 33 ++++++----- .../fairsharing/fair_sharing_test.go | 2 +- test/integration/scheduler/scheduler_test.go | 4 +- 5 files changed, 47 insertions(+), 53 deletions(-) diff --git a/pkg/resources/requests.go b/pkg/resources/requests.go index 3cf4d60a93..2bc8566dd5 100644 --- a/pkg/resources/requests.go +++ b/pkg/resources/requests.go @@ -92,6 +92,11 @@ func ResourceQuantity(name corev1.ResourceName, v int64) resource.Quantity { } } +func ResourceQuantityString(name corev1.ResourceName, v int64) string { + rq := ResourceQuantity(name, v) + return rq.String() +} + func (req Requests) CountIn(capacity Requests) int32 { var result *int32 for rName, rValue := range req { diff --git a/pkg/scheduler/flavorassigner/flavorassigner.go b/pkg/scheduler/flavorassigner/flavorassigner.go index 76231deda1..cd727d225f 100644 --- a/pkg/scheduler/flavorassigner/flavorassigner.go +++ b/pkg/scheduler/flavorassigner/flavorassigner.go @@ -603,47 +603,37 @@ func flavorSelector(spec *corev1.PodSpec, allowedKeys sets.Set[string]) nodeaffi func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorResource, val int64, rQuota cache.ResourceQuota) (granularMode, bool, *Status) { var status Status var borrow bool - used := a.cq.ResourceNode.Usage[fr] - mode := noFit - if val <= rQuota.Nominal { - // The request can be satisfied by the nominal quota, assuming quota is - // reclaimed from the cohort or assuming all active workloads in the - // ClusterQueue are preempted. - mode = preempt - } - if a.canPreemptWhileBorrowing() { - // when preemption with borrowing is enabled, we can succeed to admit the - // workload if preemption is used. - if (rQuota.BorrowingLimit == nil || val <= rQuota.Nominal+*rQuota.BorrowingLimit) && val <= a.cq.PotentialAvailable(fr) { - mode = preempt - borrow = val > rQuota.Nominal - } - } - if rQuota.BorrowingLimit != nil && used+val > rQuota.Nominal+*rQuota.BorrowingLimit { - status.append(fmt.Sprintf("borrowing limit for %s in flavor %s exceeded", fr.Resource, fr.Flavor)) - return mode, borrow, &status - } + available := a.cq.Available(fr) + maxCapacity := a.cq.PotentialAvailable(fr) - if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) { - mode = reclaim + // No Fit + if val > maxCapacity { + status.append(fmt.Sprintf("insufficient quota for %s in flavor %s, request > maximum capacity (%s > %s)", + fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val), resources.ResourceQuantityString(fr.Resource, maxCapacity))) + return noFit, false, &status } - lack := val - a.cq.Available(fr) - if lack <= 0 { - return fit, used+val > rQuota.Nominal, nil + // Fit + if val <= available { + return fit, a.cq.ResourceNode.Usage[fr]+val > rQuota.Nominal, nil } - lackQuantity := resources.ResourceQuantity(fr.Resource, lack) - msg := fmt.Sprintf("insufficient unused quota in cohort for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity) - if !a.cq.HasParent() { - if mode == noFit { - msg = fmt.Sprintf("insufficient quota for %s in flavor %s in ClusterQueue", fr.Resource, fr.Flavor) - } else { - msg = fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity) + // Check if preemption is possible + mode := noFit + if val <= rQuota.Nominal { + mode = preempt + if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) { + mode = reclaim } + } else if a.canPreemptWhileBorrowing() { + mode = preempt + borrow = true } - status.append(msg) + + status.append(fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed", + fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val-available))) + return mode, borrow, &status } diff --git a/pkg/scheduler/flavorassigner/flavorassigner_test.go b/pkg/scheduler/flavorassigner/flavorassigner_test.go index fce7041c41..eb60f73a31 100644 --- a/pkg/scheduler/flavorassigner/flavorassigner_test.go +++ b/pkg/scheduler/flavorassigner/flavorassigner_test.go @@ -17,7 +17,6 @@ limitations under the License. package flavorassigner import ( - "fmt" "testing" "github.com/go-logr/logr" @@ -254,7 +253,7 @@ func TestAssignFlavors(t *testing.T) { }, Status: &Status{ reasons: []string{ - "insufficient quota for memory in flavor b_one in ClusterQueue", + "insufficient quota for memory in flavor b_one, request > maximum capacity (10Mi > 1Mi)", }, }, Count: 1, @@ -366,9 +365,9 @@ func TestAssignFlavors(t *testing.T) { }, Status: &Status{ reasons: []string{ - "insufficient unused quota in cohort for cpu in flavor one, 1 more needed", - "insufficient unused quota in cohort for memory in flavor two, 5Mi more needed", - "insufficient unused quota in cohort for example.com/gpu in flavor b_one, 1 more needed", + "insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)", + "insufficient unused quota for memory in flavor two, 5Mi more needed", + "insufficient unused quota for example.com/gpu in flavor b_one, 1 more needed", }, }, Count: 1, @@ -407,8 +406,8 @@ func TestAssignFlavors(t *testing.T) { }, Status: &Status{ reasons: []string{ - "insufficient quota for cpu in flavor one in ClusterQueue", - "insufficient quota for memory in flavor two in ClusterQueue", + "insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)", + "insufficient quota for memory in flavor two, request > maximum capacity (10Mi > 5Mi)", }, }, Count: 1, @@ -836,7 +835,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourceCPU: resource.MustParse("2"), }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"}, + reasons: []string{"insufficient quota for cpu in flavor one, request > maximum capacity (2 > 1)"}, }, Count: 1, }}, @@ -882,7 +881,7 @@ func TestAssignFlavors(t *testing.T) { }, Status: &Status{ - reasons: []string{"borrowing limit for cpu in flavor one exceeded"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"}, }, Count: 1, }}, @@ -963,7 +962,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourceCPU: resource.MustParse("2"), }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 2 more needed"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 2 more needed"}, }, Count: 1, }}, @@ -1080,7 +1079,7 @@ func TestAssignFlavors(t *testing.T) { }, Status: &Status{ reasons: []string{ - "insufficient quota for cpu in flavor one in ClusterQueue", + "insufficient quota for cpu in flavor one, request > maximum capacity (12 > 4)", "insufficient unused quota for cpu in flavor tainted, 3 more needed", }, }, @@ -1176,7 +1175,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourcePods: resource.MustParse("3"), }, Status: &Status{ - reasons: []string{fmt.Sprintf("insufficient quota for %s in flavor default in ClusterQueue", corev1.ResourcePods)}, + reasons: []string{"insufficient quota for pods in flavor default, request > maximum capacity (3 > 2)"}, }, Count: 3, }}, @@ -1504,7 +1503,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0}, }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"}, }, Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("12"), @@ -1561,7 +1560,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0}, }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"}, }, Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("12"), @@ -1660,7 +1659,7 @@ func TestAssignFlavors(t *testing.T) { { Name: "main", Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 11 more needed"}, + reasons: []string{"insufficient quota for cpu in flavor one, request > maximum capacity (12 > 11)"}, }, Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("12"), @@ -1813,7 +1812,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourcePods: resource.MustParse("1"), }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"}, }, Count: 1, }}, @@ -1861,7 +1860,7 @@ func TestAssignFlavors(t *testing.T) { corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0}, }, Status: &Status{ - reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"}, + reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"}, }, Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("12"), diff --git a/test/integration/scheduler/fairsharing/fair_sharing_test.go b/test/integration/scheduler/fairsharing/fair_sharing_test.go index ad075c0c5e..fe910ede2f 100644 --- a/test/integration/scheduler/fairsharing/fair_sharing_test.go +++ b/test/integration/scheduler/fairsharing/fair_sharing_test.go @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("Scheduler", func() { Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionFalse, Reason: "Pending", - Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for cpu in flavor default, 2 more needed", + Message: "couldn't assign flavors to pod set main: insufficient quota for cpu in flavor default, request > maximum capacity (10 > 8)", }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed()) diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index b3da06d631..36264e9163 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -2421,7 +2421,7 @@ var _ = ginkgo.Describe("Scheduler", func() { Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionFalse, Reason: "Pending", - Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 1Gi more needed", + Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed", }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed()) @@ -2463,7 +2463,7 @@ var _ = ginkgo.Describe("Scheduler", func() { Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionFalse, Reason: "Pending", - Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 1Gi more needed", + Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed", }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed())