Skip to content

Commit

Permalink
drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2
Browse files Browse the repository at this point in the history
Add AMDGPU_CTX_QUERY2_FLAGS_RAS_CE/UE which indicate if any error happened
between previous query and this query.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
xinhui pan authored and alexdeucher committed Mar 19, 2019
1 parent 791c476 commit ae363a2
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
17 changes: 17 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <drm/drm_auth.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
#include "amdgpu_ras.h"

#define to_amdgpu_ctx_entity(e) \
container_of((e), struct amdgpu_ctx_entity, entity)
Expand Down Expand Up @@ -344,6 +345,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
uint32_t ras_counter;

if (!fpriv)
return -EINVAL;
Expand All @@ -368,6 +370,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;

/*query ue count*/
ras_counter = amdgpu_ras_query_error_count(adev, false);
/*ras counter is monotonic increasing*/
if (ras_counter != ctx->ras_counter_ue) {
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
ctx->ras_counter_ue = ras_counter;
}

/*query ce count*/
ras_counter = amdgpu_ras_query_error_count(adev, true);
if (ras_counter != ctx->ras_counter_ce) {
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
ctx->ras_counter_ce = ras_counter;
}

mutex_unlock(&mgr->lock);
return 0;
}
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ struct amdgpu_ctx {
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
uint32_t ras_counter_ce;
uint32_t ras_counter_ue;
};

struct amdgpu_ctx_mgr {
Expand Down
3 changes: 3 additions & 0 deletions include/uapi/drm/amdgpu_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
/* indicate some job from this context once cause gpu hang */
#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2)
/* indicate some errors are detected by RAS */
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3)
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4)

/* Context priority level */
#define AMDGPU_CTX_PRIORITY_UNSET -2048
Expand Down

0 comments on commit ae363a2

Please sign in to comment.