diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 56cffe42abbc4b..816df6cc444e1c 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -81,6 +81,7 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + int nr_idle_scan; }; struct sched_domain { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a68482d6653558..72c8bf228cc02b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6324,6 +6324,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); int i, cpu, idle_cpu = -1, nr = INT_MAX; + struct sched_domain_shared *sd_share; struct rq *this_rq = this_rq(); int this = smp_processor_id(); struct sched_domain *this_sd; @@ -6363,6 +6364,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool time = cpu_clock(this); } + if (sched_feat(SIS_UTIL)) { + sd_share = rcu_dereference(per_cpu(sd_llc_shared, target)); + if (sd_share) { + /* because !--nr is the condition to stop scan */ + nr = READ_ONCE(sd_share->nr_idle_scan) + 1; + /* overloaded LLC is unlikely to have idle cpu/core */ + if (nr == 1) + return -1; + } + } + for_each_cpu_wrap(cpu, cpus, target + 1) { if (has_idle_core) { i = select_idle_core(p, cpu, cpus, &idle_cpu); @@ -9253,6 +9265,59 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; } +static inline void update_idle_cpu_scan(struct lb_env *env, + unsigned long sum_util) +{ + struct sched_domain_shared *sd_share; + int nr_scan, nr_llc, x, y; + + if (!sched_feat(SIS_UTIL)) + return; + /* + * Update the number of CPUs to scan in LLC domain, which could + * be used as a hint in select_idle_cpu(). The update of this hint + * occurs during periodic load balancing, rather than frequent + * newidle balance. + */ + nr_llc = per_cpu(sd_llc_size, env->dst_cpu); + if (env->idle == CPU_NEWLY_IDLE || + env->sd->span_weight != nr_llc) + return; + + sd_share = rcu_dereference(per_cpu(sd_llc_shared, env->dst_cpu)); + if (!sd_share) + return; + + /* + * The number of CPUs to search drops as sum_util increases, when + * sum_util hits 85% or above, the scan stops. + * The reason to choose 85% as the threshold is because this is the + * imbalance_pct(117) when a LLC sched group is overloaded. + * + * let y = 1 - (x/0.85)^2 [1] + * + * y is the ratio of CPUs to be scanned in the LLC + * domain, x is the ratio of sum_util compared to the + * CPU capacity: + * + * x = sum_util / (nr_llc * SCHED_CAPACITY_SCALE) [2] + * + * Scale x by SCHED_CAPACITY_SCALE and [2] becomes: + * + * x = sum_util / nr_llc [3] + * + * and [1] becomes: + * + * y = (SCHED_CAPACITY_SCALE - (x*x)/740) / SCHED_CAPACITY_SCALE [4] + * + * then the number of CPUs to scan is nr_scan = nr_llc * y + */ + x = sum_util / nr_llc; + y = SCHED_CAPACITY_SCALE - (x * x) / 740; + nr_scan = max(0, (nr_llc * y) / SCHED_CAPACITY_SCALE); + WRITE_ONCE(sd_share->nr_idle_scan, nr_scan); +} + /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @env: The load balancing environment. @@ -9265,6 +9330,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; struct sg_lb_stats tmp_sgs; + unsigned long sum_util = 0; int sg_status = 0; do { @@ -9297,6 +9363,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; + sum_util += sgs->group_util; sg = sg->next; } while (sg != env->sd->groups); @@ -9322,6 +9389,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); } + + update_idle_cpu_scan(env, sum_util); } #define NUMA_IMBALANCE_MIN 2 diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1cf435bbcd9cad..3334a1b93fc6be 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -61,6 +61,7 @@ SCHED_FEAT(TTWU_QUEUE, true) * When doing wakeups, attempt to limit superfluous scans of the LLC domain. */ SCHED_FEAT(SIS_PROP, true) +SCHED_FEAT(SIS_UTIL, true) /* * Issue a WARN when we do multiple update_rq_clock() calls