From b38c98640b98e7da71afe4643a070cc26075127c Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Fri, 17 Oct 2025 13:02:06 -0700 Subject: [PATCH 1/3] @FIR-1031 - GGML: Add TSI Kernel Count for each OP at PERF TABLE --- ggml/include/ggml.h | 5 ++++- ggml/src/ggml-tsavorite/ggml-tsavorite.cpp | 3 +++ ggml/src/ggml.c | 2 ++ src/llama-context.cpp | 8 +++++--- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index d73ee6e0afd5e..d053288051885 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -662,9 +662,10 @@ extern "C" { #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL) int64_t perf_runs; + int64_t tsi_kernel_runs; int64_t perf_time_us; enum ggml_compute_backend_type ggml_compute_backend; - char padding[4]; + char padding[12]; #else char padding[8]; #endif /* GML_PERF-related flag */ @@ -2561,11 +2562,13 @@ extern "C" { struct ggml_perf_backend_subtotals { int64_t total_us; int64_t runs; + int64_t tsi_kernel_count; }; struct ggml_perf_unary_subtotals { int64_t total_us; int64_t runs; + int64_t tsi_kernel_count; }; // internal perf accumulation struct struct ggml_perf_totals { diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp index c2cf6d5b5141b..5ce7c2e4d45a6 100644 --- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp +++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp @@ -1225,6 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, val[0] = scale; ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf); ++device->stats.op_run_count[kernel_type].num_of_kernel_call; + ++node->tsi_kernel_runs; } } } @@ -1258,6 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, // kernel call ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP); ++device->stats.op_run_count[kernel_type].num_of_kernel_call; + ++node->tsi_kernel_runs; } } } @@ -1372,6 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP); } ++device->stats.op_run_count[kernel_type].num_of_kernel_call; + ++node->tsi_kernel_runs; if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) { log_data.data_type = GGML_TSAVORITE_TENSOR_NODE; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 960622c9cb8ba..0628c437f2592 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -7249,12 +7249,14 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct if (be >= GGML_COMPUTE_BACKEND_CPU && be < GGML_COMPUTE_BACKEND_COUNT) { totals[op].backend_subtotals[be].total_us += node->perf_time_us; totals[op].backend_subtotals[be].runs += node->perf_runs; + totals[op].backend_subtotals[be].tsi_kernel_count += node->tsi_kernel_runs; } if (op == GGML_OP_UNARY) { enum ggml_unary_op subop = ggml_get_unary_op(node); totals[op].unary_subtotals[subop].total_us += node->perf_time_us; totals[op].unary_subtotals[subop].runs += node->perf_runs; + totals[op].unary_subtotals[subop].tsi_kernel_count += node->tsi_kernel_runs; } } } diff --git a/src/llama-context.cpp b/src/llama-context.cpp index a7f77ae1c64f3..e9a19c9e96c97 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -2791,7 +2791,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { #elif defined(GGML_PERF) || defined(GGML_PERF_DETAIL) void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n"); - LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us"); + LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s %16s\n", "Op", "Target", "Runs", "TSI_KERNEL-RUN", "Total us", "Avg us"); for (int i = 0; i < GGML_OP_COUNT; ++i) { if (totals[i].runs > 0) { @@ -2801,10 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { char padded_backend[7] = {0}; // 6 chars + null terminator snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name); - LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16.2f\n", + LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16ld %16.2f\n", totals[i].op_name ? totals[i].op_name : "UNKNOWN", padded_backend, totals[i].backend_subtotals[b].runs, + totals[i].backend_subtotals[b].tsi_kernel_count, totals[i].backend_subtotals[b].total_us, (double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs); } @@ -2826,10 +2827,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { char padded_backend[7] = {0}; snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name ? backend_name : "UNK"); - LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16.2f\n", + LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16ld %16.2f\n", ggml_unary_op_name((enum ggml_unary_op) j), padded_backend, totals[i].unary_subtotals[j].runs, + totals[i].unary_subtotals[j].tsi_kernel_count, totals[i].unary_subtotals[j].total_us, (double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs); } From 20316282f32c7f6b40e06ab5ad019f6add9bf948 Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Fri, 17 Oct 2025 13:06:33 -0700 Subject: [PATCH 2/3] Fixed identation issue raise by Ashish --- ggml/include/ggml.h | 2 +- ggml/src/ggml-tsavorite/ggml-tsavorite.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index d053288051885..d78fcb747ea3b 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -662,7 +662,7 @@ extern "C" { #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL) int64_t perf_runs; - int64_t tsi_kernel_runs; + int64_t tsi_kernel_runs; int64_t perf_time_us; enum ggml_compute_backend_type ggml_compute_backend; char padding[12]; diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp index 5ce7c2e4d45a6..0de76aeb3f450 100644 --- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp +++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp @@ -1225,7 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, val[0] = scale; ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf); ++device->stats.op_run_count[kernel_type].num_of_kernel_call; - ++node->tsi_kernel_runs; + ++node->tsi_kernel_runs; } } } @@ -1259,7 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, // kernel call ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP); ++device->stats.op_run_count[kernel_type].num_of_kernel_call; - ++node->tsi_kernel_runs; + ++node->tsi_kernel_runs; } } } @@ -1374,7 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP); } ++device->stats.op_run_count[kernel_type].num_of_kernel_call; - ++node->tsi_kernel_runs; + ++node->tsi_kernel_runs; if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) { log_data.data_type = GGML_TSAVORITE_TENSOR_NODE; From 219745ee26b37122cc280233e6df2af35f822d73 Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Fri, 17 Oct 2025 13:10:27 -0700 Subject: [PATCH 3/3] fixed identation issue --- ggml/src/ggml.c | 2 +- src/llama-context.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 0628c437f2592..79072ef59a41f 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -7256,7 +7256,7 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct enum ggml_unary_op subop = ggml_get_unary_op(node); totals[op].unary_subtotals[subop].total_us += node->perf_time_us; totals[op].unary_subtotals[subop].runs += node->perf_runs; - totals[op].unary_subtotals[subop].tsi_kernel_count += node->tsi_kernel_runs; + totals[op].unary_subtotals[subop].tsi_kernel_count += node->tsi_kernel_runs; } } } diff --git a/src/llama-context.cpp b/src/llama-context.cpp index e9a19c9e96c97..24491e9309a5c 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -2801,11 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { char padded_backend[7] = {0}; // 6 chars + null terminator snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name); - LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16ld %16.2f\n", + LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16ld %16.2f\n", totals[i].op_name ? totals[i].op_name : "UNKNOWN", padded_backend, totals[i].backend_subtotals[b].runs, - totals[i].backend_subtotals[b].tsi_kernel_count, + totals[i].backend_subtotals[b].tsi_kernel_count, totals[i].backend_subtotals[b].total_us, (double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs); } @@ -2831,7 +2831,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) { ggml_unary_op_name((enum ggml_unary_op) j), padded_backend, totals[i].unary_subtotals[j].runs, - totals[i].unary_subtotals[j].tsi_kernel_count, + totals[i].unary_subtotals[j].tsi_kernel_count, totals[i].unary_subtotals[j].total_us, (double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs); }