Skip to content

Commit

Permalink
drm/amdgpu: add support for IP discovery gc_info table v2
Browse files Browse the repository at this point in the history
commit 5e713c6 upstream.

Used on gfx9 based systems. Fixes incorrect CU counts reported
in the kernel log.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1833
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
alexdeucher authored and gregkh committed Jan 5, 2022
1 parent 97fd2b6 commit 94ba5b0
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 22 deletions.
76 changes: 54 additions & 22 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Expand Up @@ -415,38 +415,70 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
}
}

union gc_info {
struct gc_info_v1_0 v1;
struct gc_info_v2_0 v2;
};

int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
struct gc_info_v1_0 *gc_info;
union gc_info *gc_info;

if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}

bhdr = (struct binary_header *)adev->mman.discovery_bin;
gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
gc_info = (union gc_info *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[GC].offset));

adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
le32_to_cpu(gc_info->gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);

switch (gc_info->v1.header.version_major) {
case 1:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
break;
case 2:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
break;
default:
dev_err(adev->dev,
"Unhandled GC info table %d.%d\n",
gc_info->v1.header.version_major,
gc_info->v1.header.version_minor);
return -EINVAL;
}
return 0;
}
49 changes: 49 additions & 0 deletions drivers/gpu/drm/amd/include/discovery.h
Expand Up @@ -143,6 +143,55 @@ struct gc_info_v1_0 {
uint32_t gc_num_gl2a;
};

struct gc_info_v1_1 {
struct gpu_info_header header;

uint32_t gc_num_se;
uint32_t gc_num_wgp0_per_sa;
uint32_t gc_num_wgp1_per_sa;
uint32_t gc_num_rb_per_se;
uint32_t gc_num_gl2c;
uint32_t gc_num_gprs;
uint32_t gc_num_max_gs_thds;
uint32_t gc_gs_table_depth;
uint32_t gc_gsprim_buff_depth;
uint32_t gc_parameter_cache_depth;
uint32_t gc_double_offchip_lds_buffer;
uint32_t gc_wave_size;
uint32_t gc_max_waves_per_simd;
uint32_t gc_max_scratch_slots_per_cu;
uint32_t gc_lds_size;
uint32_t gc_num_sc_per_se;
uint32_t gc_num_sa_per_se;
uint32_t gc_num_packer_per_sc;
uint32_t gc_num_gl2a;
uint32_t gc_num_tcp_per_sa;
uint32_t gc_num_sdp_interface;
uint32_t gc_num_tcps;
};

struct gc_info_v2_0 {
struct gpu_info_header header;

uint32_t gc_num_se;
uint32_t gc_num_cu_per_sh;
uint32_t gc_num_sh_per_se;
uint32_t gc_num_rb_per_se;
uint32_t gc_num_tccs;
uint32_t gc_num_gprs;
uint32_t gc_num_max_gs_thds;
uint32_t gc_gs_table_depth;
uint32_t gc_gsprim_buff_depth;
uint32_t gc_parameter_cache_depth;
uint32_t gc_double_offchip_lds_buffer;
uint32_t gc_wave_size;
uint32_t gc_max_waves_per_simd;
uint32_t gc_max_scratch_slots_per_cu;
uint32_t gc_lds_size;
uint32_t gc_num_sc_per_se;
uint32_t gc_num_packer_per_sc;
};

typedef struct harvest_info_header {
uint32_t signature; /* Table Signature */
uint32_t version; /* Table Version */
Expand Down

0 comments on commit 94ba5b0

Please sign in to comment.