Skip to content

Commit

Permalink
drm/i915: Replace global breadcrumbs with per-context interrupt tracking
Browse files Browse the repository at this point in the history
A few years ago, see commit 688e6c7 ("drm/i915: Slaughter the
thundering i915_wait_request herd"), the issue of handling multiple
clients waiting in parallel was brought to our attention. The
requirement was that every client should be woken immediately upon its
request being signaled, without incurring any cpu overhead.

To handle certain fragility of our hw meant that we could not do a
simple check inside the irq handler (some generations required almost
unbounded delays before we could be sure of seqno coherency) and so
request completion checking required delegation.

Before commit 688e6c7, the solution was simple. Every client
waiting on a request would be woken on every interrupt and each would do
a heavyweight check to see if their request was complete. Commit
688e6c7 introduced an rbtree so that only the earliest waiter on
the global timeline would woken, and would wake the next and so on.
(Along with various complications to handle requests being reordered
along the global timeline, and also a requirement for kthread to provide
a delegate for fence signaling that had no process context.)

The global rbtree depends on knowing the execution timeline (and global
seqno). Without knowing that order, we must instead check all contexts
queued to the HW to see which may have advanced. We trim that list by
only checking queued contexts that are being waited on, but still we
keep a list of all active contexts and their active signalers that we
inspect from inside the irq handler. By moving the waiters onto the fence
signal list, we can combine the client wakeup with the dma_fence
signaling (a dramatic reduction in complexity, but does require the HW
being coherent, the seqno must be visible from the cpu before the
interrupt is raised - we keep a timer backup just in case).

Having previously fixed all the issues with irq-seqno serialisation (by
inserting delays onto the GPU after each request instead of random delays
on the CPU after each interrupt), we can rely on the seqno state to
perfom direct wakeups from the interrupt handler. This allows us to
preserve our single context switch behaviour of the current routine,
with the only downside that we lose the RT priority sorting of wakeups.
In general, direct wakeup latency of multiple clients is about the same
(about 10% better in most cases) with a reduction in total CPU time spent
in the waiter (about 20-50% depending on gen). Average herd behaviour is
improved, but at the cost of not delegating wakeups on task_prio.

v2: Capture fence signaling state for error state and add comments to
warm even the most cold of hearts.
v3: Check if the request is still active before busywaiting
v4: Reduce the amount of pointer misdirection with list_for_each_safe
and using a local i915_request variable inside the loops
v5: Add a missing pluralisation to a purely informative selftest message.

References: 688e6c7 ("drm/i915: Slaughter the thundering i915_wait_request herd")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-2-chris@chris-wilson.co.uk
  • Loading branch information
ickle committed Jan 29, 2019
1 parent 3df0bd1 commit 52c0fdb
Show file tree
Hide file tree
Showing 23 changed files with 890 additions and 1,481 deletions.
28 changes: 1 addition & 27 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,29 +1315,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));

for_each_engine(engine, dev_priv, id) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *rb;

seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
engine->hangcheck.seqno, seqno[id],
intel_engine_last_submit(engine),
jiffies_to_msecs(jiffies -
engine->hangcheck.action_timestamp));
seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
yesno(intel_engine_has_waiter(engine)),
seq_printf(m, "\tfake irq active? %s\n",
yesno(test_bit(engine->id,
&dev_priv->gpu_error.missed_irq_rings)));

spin_lock_irq(&b->rb_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);

seq_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
spin_unlock_irq(&b->rb_lock);

seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
Expand Down Expand Up @@ -2021,18 +2008,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
return 0;
}

static int count_irq_waiters(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
int count = 0;

for_each_engine(engine, i915, id)
count += intel_engine_has_waiter(engine);

return count;
}

static const char *rps_power_to_str(unsigned int power)
{
static const char * const strings[] = {
Expand Down Expand Up @@ -2072,7 +2047,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "RPS enabled? %d\n", rps->enabled);
seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine)
{
ce->gem_context = ctx;

INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
}

static struct i915_gem_context *
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/i915_gem_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ struct i915_gem_context {
struct intel_context {
struct i915_gem_context *gem_context;
struct intel_engine_cs *active;
struct list_head signal_link;
struct list_head signals;
struct i915_vma *state;
struct intel_ring *ring;
u32 *lrc_reg_state;
Expand Down
83 changes: 8 additions & 75 deletions drivers/gpu/drm/i915/i915_gpu_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
if (!erq->seqno)
return;

err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
prefix, erq->pid, erq->ban_score,
erq->context, erq->seqno, erq->sched_attr.priority,
erq->context, erq->seqno,
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&erq->flags) ? "!" : "",
test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&erq->flags) ? "+" : "",
erq->sched_attr.priority,
jiffies_to_msecs(erq->jiffies - epoch),
erq->start, erq->head, erq->tail);
}
Expand Down Expand Up @@ -530,7 +535,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
}
err_printf(m, " seqno: 0x%08x\n", ee->seqno);
err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno);
err_printf(m, " waiting: %s\n", yesno(ee->waiting));
err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head);
err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail);
err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n",
Expand Down Expand Up @@ -804,21 +808,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error->epoch);
}

if (IS_ERR(ee->waiters)) {
err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
m->i915->engine[i]->name);
} else if (ee->num_waiters) {
err_printf(m, "%s --- %d waiters\n",
m->i915->engine[i]->name,
ee->num_waiters);
for (j = 0; j < ee->num_waiters; j++) {
err_printf(m, " seqno 0x%08x for %s [%d]\n",
ee->waiters[j].seqno,
ee->waiters[j].comm,
ee->waiters[j].pid);
}
}

print_error_obj(m, m->i915->engine[i],
"ringbuffer", ee->ringbuffer);

Expand Down Expand Up @@ -1000,8 +989,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
i915_error_object_free(ee->wa_ctx);

kfree(ee->requests);
if (!IS_ERR_OR_NULL(ee->waiters))
kfree(ee->waiters);
}

for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
Expand Down Expand Up @@ -1205,59 +1192,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
I915_READ(RING_SYNC_2(engine->mmio_base));
}

static void error_record_engine_waiters(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct drm_i915_error_waiter *waiter;
struct rb_node *rb;
int count;

ee->num_waiters = 0;
ee->waiters = NULL;

if (RB_EMPTY_ROOT(&b->waiters))
return;

if (!spin_trylock_irq(&b->rb_lock)) {
ee->waiters = ERR_PTR(-EDEADLK);
return;
}

count = 0;
for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
count++;
spin_unlock_irq(&b->rb_lock);

waiter = NULL;
if (count)
waiter = kmalloc_array(count,
sizeof(struct drm_i915_error_waiter),
GFP_ATOMIC);
if (!waiter)
return;

if (!spin_trylock_irq(&b->rb_lock)) {
kfree(waiter);
ee->waiters = ERR_PTR(-EDEADLK);
return;
}

ee->waiters = waiter;
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);

strcpy(waiter->comm, w->tsk->comm);
waiter->pid = w->tsk->pid;
waiter->seqno = w->seqno;
waiter++;

if (++ee->num_waiters == count)
break;
}
spin_unlock_irq(&b->rb_lock);
}

static void error_record_engine_registers(struct i915_gpu_state *error,
struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
Expand Down Expand Up @@ -1293,7 +1227,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,

intel_engine_get_instdone(engine, &ee->instdone);

ee->waiting = intel_engine_has_waiter(engine);
ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ee->acthd = intel_engine_get_active_head(engine);
ee->seqno = intel_engine_get_seqno(engine);
Expand Down Expand Up @@ -1367,6 +1300,7 @@ static void record_request(struct i915_request *request,
{
struct i915_gem_context *ctx = request->gem_context;

erq->flags = request->fence.flags;
erq->context = ctx->hw_id;
erq->sched_attr = request->sched.attr;
erq->ban_score = atomic_read(&ctx->ban_score);
Expand Down Expand Up @@ -1542,7 +1476,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->engine_id = i;

error_record_engine_registers(error, engine, ee);
error_record_engine_waiters(engine, ee);
error_record_engine_execlists(engine, ee);

request = i915_gem_find_active_request(engine);
Expand Down
9 changes: 1 addition & 8 deletions drivers/gpu/drm/i915/i915_gpu_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ struct i915_gpu_state {
int engine_id;
/* Software tracked state */
bool idle;
bool waiting;
int num_waiters;
unsigned long hangcheck_timestamp;
struct i915_address_space *vm;
int num_requests;
Expand Down Expand Up @@ -147,6 +145,7 @@ struct i915_gpu_state {
struct drm_i915_error_object *default_state;

struct drm_i915_error_request {
unsigned long flags;
long jiffies;
pid_t pid;
u32 context;
Expand All @@ -159,12 +158,6 @@ struct i915_gpu_state {
} *requests, execlist[EXECLIST_MAX_PORTS];
unsigned int num_ports;

struct drm_i915_error_waiter {
char comm[TASK_COMM_LEN];
pid_t pid;
u32 seqno;
} *waiters;

struct {
u32 gfx_mode;
union {
Expand Down
82 changes: 11 additions & 71 deletions drivers/gpu/drm/i915/i915_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,66 +1169,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
return;
}

static void notify_ring(struct intel_engine_cs *engine)
{
const u32 seqno = intel_engine_get_seqno(engine);
struct i915_request *rq = NULL;
struct task_struct *tsk = NULL;
struct intel_wait *wait;

if (unlikely(!engine->breadcrumbs.irq_armed))
return;

rcu_read_lock();

spin_lock(&engine->breadcrumbs.irq_lock);
wait = engine->breadcrumbs.irq_wait;
if (wait) {
/*
* We use a callback from the dma-fence to submit
* requests after waiting on our own requests. To
* ensure minimum delay in queuing the next request to
* hardware, signal the fence now rather than wait for
* the signaler to be woken up. We still wake up the
* waiter in order to handle the irq-seqno coherency
* issues (we may receive the interrupt before the
* seqno is written, see __i915_request_irq_complete())
* and to handle coalescing of multiple seqno updates
* and many waiters.
*/
if (i915_seqno_passed(seqno, wait->seqno)) {
struct i915_request *waiter = wait->request;

if (waiter &&
!i915_request_signaled(waiter) &&
intel_wait_check_request(wait, waiter))
rq = i915_request_get(waiter);

tsk = wait->tsk;
}

engine->breadcrumbs.irq_count++;
} else {
if (engine->breadcrumbs.irq_armed)
__intel_engine_disarm_breadcrumbs(engine);
}
spin_unlock(&engine->breadcrumbs.irq_lock);

if (rq) {
spin_lock(&rq->lock);
dma_fence_signal_locked(&rq->fence);
GEM_BUG_ON(!i915_request_completed(rq));
spin_unlock(&rq->lock);

i915_request_put(rq);
}

if (tsk && tsk->state & TASK_NORMAL)
wake_up_process(tsk);

rcu_read_unlock();
}

static void vlv_c0_read(struct drm_i915_private *dev_priv,
struct intel_rps_ei *ei)
{
Expand Down Expand Up @@ -1473,20 +1413,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & ILK_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
}

static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & GT_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
if (gt_iir & GT_BLT_USER_INTERRUPT)
notify_ring(dev_priv->engine[BCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);

if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
Expand All @@ -1506,7 +1446,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
tasklet = true;

if (iir & GT_RENDER_USER_INTERRUPT) {
notify_ring(engine);
intel_engine_breadcrumbs_irq(engine);
tasklet |= USES_GUC_SUBMISSION(engine->i915);
}

Expand Down Expand Up @@ -1852,7 +1792,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)

if (HAS_VEBOX(dev_priv)) {
if (pm_iir & PM_VEBOX_USER_INTERRUPT)
notify_ring(dev_priv->engine[VECS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);

if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
Expand Down Expand Up @@ -4276,7 +4216,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
I915_WRITE16(IIR, iir);

if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);

if (iir & I915_MASTER_ERROR_INTERRUPT)
i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
Expand Down Expand Up @@ -4384,7 +4324,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir);

if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);

if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
Expand Down Expand Up @@ -4529,10 +4469,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir);

if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);

if (iir & I915_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]);
intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);

if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
Expand Down

0 comments on commit 52c0fdb

Please sign in to comment.