Skip to content

Commit

Permalink
sched/fair: Tighten prefer_spread feature
Browse files Browse the repository at this point in the history
This patch tightens the prefer_spread feature by doing the
following.

(1) While picking the busiest group in update_sd_pick_busiest(),
if the current group and busiest group are classified as same,
use number of runnable tasks to break the ties. Use group load
as the next tie breaker. Otherwise we may end up selecting the
group with more utilization but with just 1 task.

(2) Ignore average load checks when the load balancing CPU is
idle and prefer_spread is set.

(3) Allow no-hz idle balance CPUs to pull the tasks when the
sched domain is not over-utilized but prefer_spread is set.

(4) There are cases in calculate_imbalance() that skip imbalance
override check due to which task are not getting pulled. Move this
check to outside of calculate_imbalance() and set the imbalance to
half of the group load.

(5) when the weighted CPU load is 0, find_busiest_queue() can't
find the busiest rq. Fix this as well.

Change-Id: I93d1a62cbd4be34af993ae664a398aa868d29a0c
Signed-off-by: Pavankumar Kondeti <[email protected]>
Signed-off-by: engstk <[email protected]>
  • Loading branch information
Pavankumar Kondeti authored and engstk committed Jul 13, 2020
1 parent 9b708b3 commit 1e0659a
Showing 1 changed file with 30 additions and 21 deletions.
51 changes: 30 additions & 21 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -9765,10 +9765,19 @@ static bool update_sd_pick_busiest(struct lb_env *env,
if (sgs->group_type < busiest->group_type)
return false;

if (env->prefer_spread && env->idle != CPU_NOT_IDLE &&
(sgs->sum_nr_running > busiest->sum_nr_running) &&
(sgs->group_util > busiest->group_util))
return true;
/*
* This sg and busiest are classified as same. when prefer_spread
* is true, we want to maximize the chance of pulling taks, so
* prefer to pick sg with more runnable tasks and break the ties
* with utilization.
*/
if (env->prefer_spread) {
if (sgs->sum_nr_running < busiest->sum_nr_running)
return false;
if (sgs->sum_nr_running > busiest->sum_nr_running)
return true;
return sgs->group_util > busiest->group_util;
}

if (sgs->avg_load <= busiest->avg_load)
return false;
Expand Down Expand Up @@ -9804,10 +9813,6 @@ static bool update_sd_pick_busiest(struct lb_env *env,

asym_packing:

if (env->prefer_spread &&
(sgs->sum_nr_running < busiest->sum_nr_running))
return false;

/* This is the busiest node in its class. */
if (!(env->sd->flags & SD_ASYM_PACKING))
return true;
Expand Down Expand Up @@ -10278,15 +10283,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s

return fix_small_imbalance(env, sds);
}

/*
* If we couldn't find any imbalance, then boost the imbalance
* with the group util.
*/
if (env->prefer_spread && !env->imbalance &&
env->idle != CPU_NOT_IDLE &&
busiest->sum_nr_running > busiest->group_weight)
env->imbalance = busiest->group_util;
}

/******* find_busiest_group() helpers end here *********************/
Expand Down Expand Up @@ -10322,7 +10318,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
int cpu_local, cpu_busiest;
unsigned long capacity_local, capacity_busiest;

if (env->idle != CPU_NEWLY_IDLE)
if (env->idle != CPU_NEWLY_IDLE && !env->prefer_spread)
goto out_balanced;

if (!sds.local || !sds.busiest)
Expand Down Expand Up @@ -10371,9 +10367,13 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/*
* When dst_cpu is idle, prevent SMP nice and/or asymmetric group
* capacities from resulting in underutilization due to avg_load.
*
* When prefer_spread is enabled, force the balance even when
* busiest group has some capacity but loaded with more than 1
* task.
*/
if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) &&
busiest->group_no_capacity)
(busiest->group_no_capacity || env->prefer_spread))
goto force_balance;

/* Misfit tasks should be dealt with regardless of the avg load */
Expand Down Expand Up @@ -10419,6 +10419,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/* Looks like there is an imbalance. Compute it */
env->src_grp_type = busiest->group_type;
calculate_imbalance(env, &sds);

/*
* If we couldn't find any imbalance, then boost the imbalance
* based on the group util.
*/
if (!env->imbalance && env->prefer_spread)
env->imbalance = (busiest->group_util >> 1);

trace_sched_load_balance_stats(sds.busiest->cpumask[0],
busiest->group_type, busiest->avg_load,
busiest->load_per_task, sds.local->cpumask[0],
Expand Down Expand Up @@ -10528,7 +10536,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
* to: wl_i * capacity_j > wl_j * capacity_i; where j is
* our previous maximum.
*/
if (wl * busiest_capacity > busiest_load * capacity) {
if (wl * busiest_capacity >= busiest_load * capacity) {
busiest_load = wl;
busiest_capacity = capacity;
busiest = rq;
Expand Down Expand Up @@ -10674,7 +10682,8 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.loop = 0,
};

env.prefer_spread = (prefer_spread_on_idle(this_cpu) &&
env.prefer_spread = (idle != CPU_NOT_IDLE &&
prefer_spread_on_idle(this_cpu) &&
!((sd->flags & SD_ASYM_CPUCAPACITY) &&
!cpumask_test_cpu(this_cpu,
&asym_cap_sibling_cpus)));
Expand Down

0 comments on commit 1e0659a

Please sign in to comment.