CP-14765 - Use atomic count of VCPU's on PCPU runqueue to avoid takin…

…g spinlock Credit 1 smp load balancing tries to find work on other PCPUs by looking through their runqueues. It takes the PCPU runqueue spinlock in order to safely process the list. This causes lock contention on the PCPU runqueue and cache line boucing. Use an atomic count of items on PCPU runqueue so that VCPU are not stolen from a PCPU in an under committed situation and so that the runqueue spinlock is not taken needlessly. This should help performance as it reduces cache line bouncing of the spinlock itself and it should help improve keep the caches hot. Additionally, it may help improve turbo behaviour on Intel processors as cores will be able to idle more often.
xenserver · Jan 11, 2016 · 0f830b9 · 0f830b9
1 parent 221eb49
commit 0f830b9
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 0 deletions.
diff --git a/master/sched-credit1-use-per-pcpu-runqueue-count.patch b/master/sched-credit1-use-per-pcpu-runqueue-count.patch
@@ -0,0 +1,71 @@
+diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
+index 507e957..bf59499 100644
+--- a/xen/common/sched_credit.c
++++ b/xen/common/sched_credit.c
+@@ -238,6 +238,7 @@ struct csched_private {
+
+ static void csched_tick(void *_cpu);
+ static void csched_acct(void *dummy);
++DEFINE_PER_CPU(atomic_t, runqueue_count);
+
+ static inline int
+ __vcpu_on_runq(struct csched_vcpu *svc)
+@@ -280,6 +281,7 @@ __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+     }
+
+     list_add_tail(&svc->runq_elem, iter);
++    atomic_inc(&per_cpu(runqueue_count, cpu));
+ }
+
+ static inline void
+@@ -287,6 +289,7 @@ __runq_remove(struct csched_vcpu *svc)
+ {
+     BUG_ON( !__vcpu_on_runq(svc) );
+     list_del_init(&svc->runq_elem);
++    atomic_dec(&per_cpu(runqueue_count, svc->vcpu->processor));
+ }
+
+
+@@ -555,6 +558,7 @@ csched_alloc_pdata(const struct scheduler *ops, int cpu)
+     /* Start off idling... */
+     BUG_ON(!is_idle_vcpu(curr_on_cpu(cpu)));
+     cpumask_set_cpu(cpu, prv->idlers);
++    atomic_set(&per_cpu(runqueue_count, cpu), 0);
+
+     spin_unlock_irqrestore(&prv->lock, flags);
+
+@@ -1576,6 +1580,17 @@ csched_load_balance(struct csched_private *prv, int cpu,
+                 goto next_node;
+             do
+             {
++                spinlock_t *lock;
++
++                /*
++                 * Ignore peer cpu with only one task on its runqueue,
++                 * this will race with adding/removing tasks but the
++                 * lock for the runqueue is still taken below
++                 */
++                if (atomic_read(&per_cpu(runqueue_count, peer_cpu)) < 2) {
++                    peer_cpu = cpumask_cycle(peer_cpu, &workers);
++                    continue;
++                }
+                 /*
+                  * Get ahold of the scheduler lock for this peer CPU.
+                  *
+@@ -1583,7 +1598,7 @@ csched_load_balance(struct csched_private *prv, int cpu,
+                  * could cause a deadlock if the peer CPU is also load
+                  * balancing and trying to lock this CPU.
+                  */
+-                spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);
++                lock = pcpu_schedule_trylock(peer_cpu);
+
+                 if ( !lock )
+                 {
+@@ -1806,6 +1821,7 @@ csched_dump_pcpu(const struct scheduler *ops, int cpu)
+     runq = &spc->runq;
+
+     cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_mask, cpu));
++    printk(" qcnt %d", atomic_read(&per_cpu(runqueue_count, cpu)));
+     printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
+     cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_mask, cpu));
+     printk("core=%s\n", cpustr);
diff --git a/master/series b/master/series
@@ -128,6 +128,7 @@ local-cpuid.patch
 detect-nehalem-c-state.patch # malcolmc
 quirk-hp-gen8-rmrr.patch # malcolmc
 quirk-pci-phantom-function-devices.patch # malcolmc
+sched-credit1-use-per-pcpu-runqueue-count.patch # malcolmc
 
 # dvrabel - v1 posted
 0001-trace-include-timestamp-in-trace-records-added-by-HV.patch