Skip to content

Commit

Permalink
drm/amdgpu: Add check to prevent IH overflow
Browse files Browse the repository at this point in the history
[ Upstream commit e4180c4 ]

Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2").
When an ring buffer overflow happens the appropriate bit is set in the WPTR
register which is also written back to memory. But clearing the bit in the
WPTR doesn't trigger another memory writeback.

So what can happen is that we end up processing the buffer overflow over and
over again because the bit is never cleared. Resulting in a random system
lockup because of an infinite loop in an interrupt handler.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Defang Bo <bodefang@126.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Defang Bo authored and gregkh committed Mar 7, 2021
1 parent 2962f65 commit 1622ab3
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 39 deletions.
37 changes: 24 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/cz_ih.c
Expand Up @@ -194,19 +194,30 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,

wptr = le32_to_cpu(*ih->wptr_cpu);

if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
}
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

/* Double check that the overflow wasn't already cleared. */
wptr = RREG32(mmIH_RB_WPTR);

if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);

/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);


out:
return (wptr & ih->ptr_mask);
}

Expand Down
36 changes: 23 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/iceland_ih.c
Expand Up @@ -194,19 +194,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,

wptr = le32_to_cpu(*ih->wptr_cpu);

if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
}
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

/* Double check that the overflow wasn't already cleared. */
wptr = RREG32(mmIH_RB_WPTR);

if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);


out:
return (wptr & ih->ptr_mask);
}

Expand Down
37 changes: 24 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/tonga_ih.c
Expand Up @@ -196,19 +196,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,

wptr = le32_to_cpu(*ih->wptr_cpu);

if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
}
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

/* Double check that the overflow wasn't already cleared. */
wptr = RREG32(mmIH_RB_WPTR);

if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;

wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);

/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 16). Hopefully
* this should allow us to catchup.
*/

dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
ih->rptr = (wptr + 16) & ih->ptr_mask;
tmp = RREG32(mmIH_RB_CNTL);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);

out:
return (wptr & ih->ptr_mask);
}

Expand Down

0 comments on commit 1622ab3

Please sign in to comment.