Skip to content
Permalink
Browse files
muqss: Tune CPU selection
I was testing MuQSS for performance using MC, LLC and SMT and discovered that
it is not up to performance, at least on my machine. This commit fixes the
performance, at least on my machine (Ryzen 1700), I have no way of checking that
on any Intel machine as I don't have one I can test on, but it should work
similarly.

This patch modifies best CPU selection:
    Adds a check whether not only the CPU in question is in the
    idle mask, but it will check whether the mask contains its
    SMT sibling too.
    The rationale is that SMT shares all resources of the core
    and if other task is already scheduled there, it's not
    really optimal to schedule more tasks that share the same
    SMT resources.

    Patch also refines further best CPU selection and adds an
    exit threshold.
    The rationale there is that there is no need to search
    further if the best selection is already found. The best
    selection depends on localities processed and is mainly
    targeted around SMT.
  • Loading branch information
Mjasnik authored and damentz committed Jul 10, 2021
1 parent 68e5461 commit 08f6a6f8ef28aef1402902c870643b4925057db0
Showing 1 changed file with 55 additions and 31 deletions.
@@ -1116,54 +1116,78 @@ static void resched_curr(struct rq *rq)
* Other node, other CPU, idle cache, idle threads.
* Other node, other CPU, busy cache, idle threads.
* Other node, other CPU, busy threads.
*/
*/
static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
{
int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD;
int cpu_tmp;

if (cpumask_test_cpu(best_cpu, tmpmask))
goto out;
int cpu_tmp, best_poss_ranking;
struct rq *tmp_rq;

for_each_cpu(cpu_tmp, tmpmask) {
int ranking, locality;
struct rq *tmp_rq;

ranking = 0;
tmp_rq = cpu_rq(cpu_tmp);
tmp_rq = cpu_rq(best_cpu);
if (!(cpumask_test_cpu(best_cpu, tmpmask)
#ifdef CONFIG_SCHED_SMT
&& tmp_rq->siblings_idle(tmp_rq)
#endif
)) {
#ifdef CONFIG_SCHED_SMT
best_poss_ranking = CPUIDLE_DIFF_THREAD;
#elif CONFIG_SCHED_MC
best_poss_ranking = CPUIDLE_DIFF_CORE_LLC;
#else
best_poss_ranking = CPUIDLE_DIFF_CPU;
#endif
for_each_cpu(cpu_tmp, tmpmask) {
int ranking, locality;

locality = rq->cpu_locality[cpu_tmp];
ranking = 0;
tmp_rq = cpu_rq(cpu_tmp);
locality = rq->cpu_locality[cpu_tmp];
#ifdef CONFIG_NUMA
if (locality > LOCALITY_SMP)
ranking |= CPUIDLE_DIFF_NODE;
else
if (locality > LOCALITY_SMP)
ranking |= CPUIDLE_DIFF_NODE;
else
#endif
if (locality > LOCALITY_MC)
ranking |= CPUIDLE_DIFF_CPU;
if (locality > LOCALITY_MC)
ranking |= CPUIDLE_DIFF_CPU;
#ifdef CONFIG_SCHED_MC
else if (locality == LOCALITY_MC_LLC)
ranking |= CPUIDLE_DIFF_CORE_LLC;
else if (locality == LOCALITY_MC)
ranking |= CPUIDLE_DIFF_CORE;
if (!(tmp_rq->cache_idle(tmp_rq)))
ranking |= CPUIDLE_CACHE_BUSY;
else if (locality == LOCALITY_MC_LLC)
ranking |= CPUIDLE_DIFF_CORE_LLC;
else if (locality == LOCALITY_MC)
ranking |= CPUIDLE_DIFF_CORE;
#endif
#ifdef CONFIG_SCHED_SMT
if (locality == LOCALITY_SMT)
ranking |= CPUIDLE_DIFF_THREAD;
else if (locality == LOCALITY_SMT)
ranking |= CPUIDLE_DIFF_THREAD;
#endif
if (ranking < best_ranking
#ifdef CONFIG_SCHED_MC
if (ranking < best_ranking) {
if (!(tmp_rq->cache_idle(tmp_rq)))
ranking |= CPUIDLE_CACHE_BUSY;
#endif
#ifdef CONFIG_SCHED_SMT
if (ranking < best_ranking) {
if (!(tmp_rq->siblings_idle(tmp_rq))) {
ranking |= CPUIDLE_THREAD_BUSY;
if (locality == LOCALITY_SMT)
best_poss_ranking = CPUIDLE_DIFF_CORE_LLC;
}
#endif
if (ranking < best_ranking) {
best_cpu = cpu_tmp;
best_ranking = ranking;
}
#ifdef CONFIG_SCHED_SMT
|| (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq)))
}
#endif
) {
best_cpu = cpu_tmp;
best_ranking = ranking;
#ifdef CONFIG_SCHED_MC
}
#endif
if (best_ranking <= best_poss_ranking)
break;
}
}
out:
return best_cpu;
}

0 comments on commit 08f6a6f

Please sign in to comment.