Skip to content

Commit

Permalink
Revert "XANMOD: fair: Remove all energy efficiency functions"
Browse files Browse the repository at this point in the history
This reverts commit a237421.
  • Loading branch information
xanmod committed Dec 31, 2022
1 parent 0006b63 commit bf5f7d5
Showing 1 changed file with 201 additions and 0 deletions.
201 changes: 201 additions & 0 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -6983,6 +6983,199 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
return min(max_util, eenv->cpu_cap);
}

/*
* compute_energy(): Use the Energy Model to estimate the energy that @pd would
* consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
* contribution is ignored.
*/
static inline unsigned long
compute_energy(struct energy_env *eenv, struct perf_domain *pd,
struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
{
unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
unsigned long busy_time = eenv->pd_busy_time;

if (dst_cpu >= 0)
busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);

return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
}

/*
* find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
* waking task. find_energy_efficient_cpu() looks for the CPU with maximum
* spare capacity in each performance domain and uses it as a potential
* candidate to execute the task. Then, it uses the Energy Model to figure
* out which of the CPU candidates is the most energy-efficient.
*
* The rationale for this heuristic is as follows. In a performance domain,
* all the most energy efficient CPU candidates (according to the Energy
* Model) are those for which we'll request a low frequency. When there are
* several CPUs for which the frequency request will be the same, we don't
* have enough data to break the tie between them, because the Energy Model
* only includes active power costs. With this model, if we assume that
* frequency requests follow utilization (e.g. using schedutil), the CPU with
* the maximum spare capacity in a performance domain is guaranteed to be among
* the best candidates of the performance domain.
*
* In practice, it could be preferable from an energy standpoint to pack
* small tasks on a CPU in order to let other CPUs go in deeper idle states,
* but that could also hurt our chances to go cluster idle, and we have no
* ways to tell with the current Energy Model if this is actually a good
* idea or not. So, find_energy_efficient_cpu() basically favors
* cluster-packing, and spreading inside a cluster. That should at least be
* a good thing for latency, and this is consistent with the idea that most
* of the energy savings of EAS come from the asymmetry of the system, and
* not so much from breaking the tie between identical CPUs. That's also the
* reason why EAS is enabled in the topology code only for systems where
* SD_ASYM_CPUCAPACITY is set.
*
* NOTE: Forkees are not accepted in the energy-aware wake-up path because
* they don't have any useful utilization data yet and it's not possible to
* forecast their impact on energy consumption. Consequently, they will be
* placed by find_idlest_cpu() on the least loaded CPU, which might turn out
* to be energy-inefficient in some use-cases. The alternative would be to
* bias new tasks towards specific types of CPUs first, or to try to infer
* their util_avg from the parent task, but those heuristics could hurt
* other use-cases too. So, until someone finds a better way to solve this,
* let's keep things simple by re-using the existing slow path.
*/
static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
{
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = this_rq()->rd;
int cpu, best_energy_cpu, target = -1;
struct sched_domain *sd;
struct perf_domain *pd;
struct energy_env eenv;

rcu_read_lock();
pd = rcu_dereference(rd->pd);
if (!pd || READ_ONCE(rd->overutilized))
goto unlock;

/*
* Energy-aware wake-up happens on the lowest sched_domain starting
* from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
*/
sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
sd = sd->parent;
if (!sd)
goto unlock;

target = prev_cpu;

sync_entity_load_avg(&p->se);
if (!task_util_est(p))
goto unlock;

eenv_task_busy_time(&eenv, p, prev_cpu);

for (; pd; pd = pd->next) {
unsigned long cpu_cap, cpu_thermal_cap, util;
unsigned long cur_delta, max_spare_cap = 0;
bool compute_prev_delta = false;
int max_spare_cap_cpu = -1;
unsigned long base_energy;

cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);

if (cpumask_empty(cpus))
continue;

/* Account thermal pressure for the energy estimation */
cpu = cpumask_first(cpus);
cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);

eenv.cpu_cap = cpu_thermal_cap;
eenv.pd_cap = 0;

for_each_cpu(cpu, cpus) {
eenv.pd_cap += cpu_thermal_cap;

if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
continue;

if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;

util = cpu_util_next(cpu, p, cpu);
cpu_cap = capacity_of(cpu);

/*
* Skip CPUs that cannot satisfy the capacity request.
* IOW, placing the task there would make the CPU
* overutilized. Take uclamp into account to see how
* much capacity we can get out of the CPU; this is
* aligned with sched_cpu_util().
*/
util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
if (!fits_capacity(util, cpu_cap))
continue;

lsub_positive(&cpu_cap, util);

if (cpu == prev_cpu) {
/* Always use prev_cpu as a candidate. */
compute_prev_delta = true;
} else if (cpu_cap > max_spare_cap) {
/*
* Find the CPU with the maximum spare capacity
* in the performance domain.
*/
max_spare_cap = cpu_cap;
max_spare_cap_cpu = cpu;
}
}

if (max_spare_cap_cpu < 0 && !compute_prev_delta)
continue;

eenv_pd_busy_time(&eenv, cpus, p);
/* Compute the 'base' energy of the pd, without @p */
base_energy = compute_energy(&eenv, pd, cpus, p, -1);

/* Evaluate the energy impact of using prev_cpu. */
if (compute_prev_delta) {
prev_delta = compute_energy(&eenv, pd, cpus, p,
prev_cpu);
/* CPU utilization has changed */
if (prev_delta < base_energy)
goto unlock;
prev_delta -= base_energy;
best_delta = min(best_delta, prev_delta);
}

/* Evaluate the energy impact of using max_spare_cap_cpu. */
if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(&eenv, pd, cpus, p,
max_spare_cap_cpu);
/* CPU utilization has changed */
if (cur_delta < base_energy)
goto unlock;
cur_delta -= base_energy;
if (cur_delta < best_delta) {
best_delta = cur_delta;
best_energy_cpu = max_spare_cap_cpu;
}
}
}
rcu_read_unlock();

if (best_delta < prev_delta)
target = best_energy_cpu;

return target;

unlock:
rcu_read_unlock();

return target;
}

/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
* that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
Expand Down Expand Up @@ -7010,6 +7203,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
lockdep_assert_held(&p->pi_lock);
if (wake_flags & WF_TTWU) {
record_wakee(p);

if (sched_energy_enabled()) {
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
if (new_cpu >= 0)
return new_cpu;
new_cpu = prev_cpu;
}

want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}

Expand Down

0 comments on commit bf5f7d5

Please sign in to comment.