Skip to content

Commit 59d36f7

Browse files
authored
Bug Fix - Fix numa error on grace cpu in gpu-copy (#658)
The current GPU Copy BW Performance fails on Nvidia Grace systems. This is due to the memory only numa node and thus the numa_run_on_node fails for such nodes and halts completely. This fix checks for the presence of assigned CPU cores for the numa node, on checking if it has no cpu cores assigned, it skips that specific node during the args creation and continues.
1 parent 61770b8 commit 59d36f7

File tree

1 file changed

+26
-1
lines changed
  • superbench/benchmarks/micro_benchmarks/gpu_copy_performance

1 file changed

+26
-1
lines changed

superbench/benchmarks/micro_benchmarks/gpu_copy_performance/gpu_copy.cu

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
// GPU copy benchmark tests dtoh/htod/dtod data transfer bandwidth by GPU SM/DMA.
55

6+
#include <cerrno> // errno
67
#include <cstdio>
78
#include <cstring>
8-
#include <string>
99
#include <vector>
1010

1111
#include <getopt.h>
@@ -313,6 +313,25 @@ int SetGpu(int gpu_id) {
313313
return 0;
314314
}
315315

316+
// Check if its NUMA node has CPUs.
317+
bool HasCPUsForNumaNode(int node) {
318+
struct bitmask *bm = numa_allocate_cpumask();
319+
320+
int numa_err = numa_node_to_cpus(node, bm);
321+
if (numa_err != 0) {
322+
fprintf(stderr, "HasCPUsForNumaNode::numa_node_to_cpus error on node: %d, code: %d, message: %s\n", node, errno,
323+
strerror(errno));
324+
325+
numa_bitmask_free(bm);
326+
return false; // On error
327+
}
328+
329+
// Check if any CPU is assigned to the NUMA node, has_cpus is false for mem only numa nodes
330+
bool has_cpus = (numa_bitmask_weight(bm) > 0);
331+
numa_free_cpumask(bm);
332+
return has_cpus;
333+
}
334+
316335
#if defined(__HIP_PLATFORM_AMD__)
317336
bool UseFineGrained(const SubBenchArgs &args) {
318337
return args.is_src_dev_gpu && args.is_dst_dev_gpu && args.src_gpu_id != args.dst_gpu_id;
@@ -1134,6 +1153,12 @@ int main(int argc, char **argv) {
11341153
// Scan all NUMA nodes
11351154
for (int i = 0; i < numa_count; i++) {
11361155
args.numa_id = i;
1156+
1157+
// Avoid numa nodes without CPUS(eg. Nvidia Grace systems have memory only numa node)
1158+
if (!HasCPUsForNumaNode(args.numa_id)) {
1159+
continue;
1160+
}
1161+
11371162
// Scan all GPUs
11381163
for (int j = 0; j < gpu_count; j++) {
11391164
// Host-to-device benchmark

0 commit comments

Comments
 (0)