From b38c98640b98e7da71afe4643a070cc26075127c Mon Sep 17 00:00:00 2001
From: Anoop Kapoor <akapoor@wssw01.tsavoritesi.net>
Date: Fri, 17 Oct 2025 13:02:06 -0700
Subject: [PATCH 1/3] @FIR-1031 -  GGML: Add TSI Kernel Count for each OP at
 PERF TABLE

---
 ggml/include/ggml.h                        | 5 ++++-
 ggml/src/ggml-tsavorite/ggml-tsavorite.cpp | 3 +++
 ggml/src/ggml.c                            | 2 ++
 src/llama-context.cpp                      | 8 +++++---
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index d73ee6e0afd5e..d053288051885 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -662,9 +662,10 @@ extern "C" {
 
 #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL)
         int64_t perf_runs;
+	int64_t tsi_kernel_runs;
         int64_t perf_time_us;
         enum ggml_compute_backend_type ggml_compute_backend;
-        char padding[4];
+        char padding[12];
 #else
         char padding[8];
 #endif /* GML_PERF-related flag */
@@ -2561,11 +2562,13 @@ extern "C" {
 struct ggml_perf_backend_subtotals {
     int64_t total_us;
     int64_t runs;
+    int64_t tsi_kernel_count;
 };
 
 struct ggml_perf_unary_subtotals {
     int64_t total_us;
     int64_t runs;
+    int64_t tsi_kernel_count;
 };
 // internal perf accumulation struct
 struct ggml_perf_totals {
diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
index c2cf6d5b5141b..5ce7c2e4d45a6 100644
--- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
+++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
@@ -1225,6 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
                         val[0] = scale;
                         ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf);
                         ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
+			++node->tsi_kernel_runs;
 	            }
 	        }
 	    }
@@ -1258,6 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
                     // kernel call
                     ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP);
                     ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
+		    ++node->tsi_kernel_runs;
                 }
             }
         }
@@ -1372,6 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
             ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP);
 	}
         ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
+	++node->tsi_kernel_runs;
 
         if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) {
           log_data.data_type = GGML_TSAVORITE_TENSOR_NODE;
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 960622c9cb8ba..0628c437f2592 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -7249,12 +7249,14 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
         if (be >= GGML_COMPUTE_BACKEND_CPU && be < GGML_COMPUTE_BACKEND_COUNT) {
             totals[op].backend_subtotals[be].total_us += node->perf_time_us;
 	    totals[op].backend_subtotals[be].runs     += node->perf_runs;
+	    totals[op].backend_subtotals[be].tsi_kernel_count   += node->tsi_kernel_runs;
         }
 
         if (op == GGML_OP_UNARY) {
             enum ggml_unary_op subop = ggml_get_unary_op(node);
             totals[op].unary_subtotals[subop].total_us += node->perf_time_us;
             totals[op].unary_subtotals[subop].runs     += node->perf_runs;
+	    totals[op].unary_subtotals[subop].tsi_kernel_count   += node->tsi_kernel_runs;
         }
     }
 }
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index a7f77ae1c64f3..e9a19c9e96c97 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -2791,7 +2791,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
 #elif defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
 void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
     LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
-    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7s  %14s  %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
+    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7s  %14s  %16s  %16s\n", "Op", "Target", "Runs", "TSI_KERNEL-RUN", "Total us", "Avg us");
 
     for (int i = 0; i < GGML_OP_COUNT; ++i) {
         if (totals[i].runs > 0) {
@@ -2801,10 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
                     char padded_backend[7] = {0}; // 6 chars + null terminator
                     snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name);
 
-                    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7ld  %14ld  %16.2f\n",
+		    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7ld  %14ld  %16ld  %16.2f\n",
                         totals[i].op_name ? totals[i].op_name : "UNKNOWN",
                         padded_backend,
                         totals[i].backend_subtotals[b].runs,
+			totals[i].backend_subtotals[b].tsi_kernel_count,
                         totals[i].backend_subtotals[b].total_us,
                         (double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs);
                 }
@@ -2826,10 +2827,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
                         char padded_backend[7] = {0};
                         snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name ? backend_name : "UNK");
 
-                        LLAMA_LOG_TSAVORITE("    -> %-11s %-8s %7ld  %14ld  %16.2f\n",
+                        LLAMA_LOG_TSAVORITE("    -> %-11s %-8s %7ld  %14ld  %16ld  %16.2f\n",
                             ggml_unary_op_name((enum ggml_unary_op) j),
                             padded_backend,
                             totals[i].unary_subtotals[j].runs,
+			    totals[i].unary_subtotals[j].tsi_kernel_count,
                             totals[i].unary_subtotals[j].total_us,
                             (double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
                     }

From 20316282f32c7f6b40e06ab5ad019f6add9bf948 Mon Sep 17 00:00:00 2001
From: Anoop Kapoor <akapoor@wssw01.tsavoritesi.net>
Date: Fri, 17 Oct 2025 13:06:33 -0700
Subject: [PATCH 2/3] Fixed identation issue raise by Ashish

---
 ggml/include/ggml.h                        | 2 +-
 ggml/src/ggml-tsavorite/ggml-tsavorite.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index d053288051885..d78fcb747ea3b 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -662,7 +662,7 @@ extern "C" {
 
 #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL)
         int64_t perf_runs;
-	int64_t tsi_kernel_runs;
+        int64_t tsi_kernel_runs;
         int64_t perf_time_us;
         enum ggml_compute_backend_type ggml_compute_backend;
         char padding[12];
diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
index 5ce7c2e4d45a6..0de76aeb3f450 100644
--- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
+++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
@@ -1225,7 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
                         val[0] = scale;
                         ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf);
                         ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
-			++node->tsi_kernel_runs;
+                        ++node->tsi_kernel_runs;
 	            }
 	        }
 	    }
@@ -1259,7 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
                     // kernel call
                     ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP);
                     ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
-		    ++node->tsi_kernel_runs;
+                    ++node->tsi_kernel_runs;
                 }
             }
         }
@@ -1374,7 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
             ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP);
 	}
         ++device->stats.op_run_count[kernel_type].num_of_kernel_call;
-	++node->tsi_kernel_runs;
+        ++node->tsi_kernel_runs;
 
         if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) {
           log_data.data_type = GGML_TSAVORITE_TENSOR_NODE;

From 219745ee26b37122cc280233e6df2af35f822d73 Mon Sep 17 00:00:00 2001
From: Anoop Kapoor <akapoor@wssw01.tsavoritesi.net>
Date: Fri, 17 Oct 2025 13:10:27 -0700
Subject: [PATCH 3/3] fixed identation issue

---
 ggml/src/ggml.c       | 2 +-
 src/llama-context.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 0628c437f2592..79072ef59a41f 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -7256,7 +7256,7 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
             enum ggml_unary_op subop = ggml_get_unary_op(node);
             totals[op].unary_subtotals[subop].total_us += node->perf_time_us;
             totals[op].unary_subtotals[subop].runs     += node->perf_runs;
-	    totals[op].unary_subtotals[subop].tsi_kernel_count   += node->tsi_kernel_runs;
+            totals[op].unary_subtotals[subop].tsi_kernel_count   += node->tsi_kernel_runs;
         }
     }
 }
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index e9a19c9e96c97..24491e9309a5c 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -2801,11 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
                     char padded_backend[7] = {0}; // 6 chars + null terminator
                     snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name);
 
-		    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7ld  %14ld  %16ld  %16.2f\n",
+                    LLAMA_LOG_TSAVORITE("  %-16s %-8s %7ld  %14ld  %16ld  %16.2f\n",
                         totals[i].op_name ? totals[i].op_name : "UNKNOWN",
                         padded_backend,
                         totals[i].backend_subtotals[b].runs,
-			totals[i].backend_subtotals[b].tsi_kernel_count,
+                        totals[i].backend_subtotals[b].tsi_kernel_count,
                         totals[i].backend_subtotals[b].total_us,
                         (double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs);
                 }
@@ -2831,7 +2831,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
                             ggml_unary_op_name((enum ggml_unary_op) j),
                             padded_backend,
                             totals[i].unary_subtotals[j].runs,
-			    totals[i].unary_subtotals[j].tsi_kernel_count,
+                            totals[i].unary_subtotals[j].tsi_kernel_count,
                             totals[i].unary_subtotals[j].total_us,
                             (double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
                     }