diff --git a/pkg/controllers/provisioning/scheduling/queue.go b/pkg/controllers/provisioning/scheduling/queue.go index d2a98a822f..c1a506211c 100644 --- a/pkg/controllers/provisioning/scheduling/queue.go +++ b/pkg/controllers/provisioning/scheduling/queue.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/karpenter/pkg/utils/pod" "sigs.k8s.io/karpenter/pkg/utils/resources" ) @@ -96,6 +97,15 @@ func byCPUAndMemoryDescending(pods []*v1.Pod) func(i int, j int) bool { return true } + // anti-affinity pods should be sorted before normal pods + if affinityCmp := pod.PodAffinityCmp(lhsPod, rhsPod); affinityCmp != 0 { + return affinityCmp > 0 + } + + if len(lhsPod.Spec.TopologySpreadConstraints) != len(rhsPod.Spec.TopologySpreadConstraints) { + return len(lhsPod.Spec.TopologySpreadConstraints) > len(rhsPod.Spec.TopologySpreadConstraints) + } + // If all else is equal, give a consistent ordering. This reduces the number of NominatePod events as we // de-duplicate those based on identical content. diff --git a/pkg/utils/pod/scheduling.go b/pkg/utils/pod/scheduling.go index 139df8e634..3daf108282 100644 --- a/pkg/utils/pod/scheduling.go +++ b/pkg/utils/pod/scheduling.go @@ -198,3 +198,38 @@ func HasPodAntiAffinity(pod *corev1.Pod) bool { (len(pod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 || len(pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) != 0) } + +// PodAffinityCmp compares two pods based on their affinity +func PodAffinityCmp(lhsPod *corev1.Pod, rhsPod *corev1.Pod) int { + if lhsPod.Spec.Affinity != nil && rhsPod.Spec.Affinity != nil { + if HasRequiredPodAntiAffinity(lhsPod) && !HasRequiredPodAntiAffinity(rhsPod) { + return 1 + } else if !HasRequiredPodAntiAffinity(lhsPod) && HasRequiredPodAntiAffinity(rhsPod) { + return -1 + } else if HasRequiredPodAntiAffinity(lhsPod) && HasRequiredPodAntiAffinity(rhsPod) { + return PodAntiAffinityCmp(lhsPod, rhsPod) + } + } + + return 0 +} + +// PodAntiAffinityCmp compares two pods based on their the size of their anti-affinity constraints +func PodAntiAffinityCmp(lhsPod *corev1.Pod, rhsPod *corev1.Pod) int { + lPodAntiAffinity := lhsPod.Spec.Affinity.PodAntiAffinity + rPodAntiAffinity := rhsPod.Spec.Affinity.PodAntiAffinity + if len(lPodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) > len(rPodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) { + return 1 + } else if len(lPodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) < + len(rPodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) { + return -1 + } + + if len(lPodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > len(rPodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) { + return 1 + } else if len(lPodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) < len(rPodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) { + return -1 + } + + return 0 +}