Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -662,9 +662,10 @@ extern "C" {

#if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL)
int64_t perf_runs;
int64_t tsi_kernel_runs;
int64_t perf_time_us;
enum ggml_compute_backend_type ggml_compute_backend;
char padding[4];
char padding[12];
#else
char padding[8];
#endif /* GML_PERF-related flag */
Expand Down Expand Up @@ -2561,11 +2562,13 @@ extern "C" {
struct ggml_perf_backend_subtotals {
int64_t total_us;
int64_t runs;
int64_t tsi_kernel_count;
};

struct ggml_perf_unary_subtotals {
int64_t total_us;
int64_t runs;
int64_t tsi_kernel_count;
};
// internal perf accumulation struct
struct ggml_perf_totals {
Expand Down
3 changes: 3 additions & 0 deletions ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
val[0] = scale;
ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf);
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
++node->tsi_kernel_runs;
}
}
}
Expand Down Expand Up @@ -1258,6 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
// kernel call
ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP);
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
++node->tsi_kernel_runs;
}
}
}
Expand Down Expand Up @@ -1372,6 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP);
}
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
++node->tsi_kernel_runs;

if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) {
log_data.data_type = GGML_TSAVORITE_TENSOR_NODE;
Expand Down
2 changes: 2 additions & 0 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -7249,12 +7249,14 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
if (be >= GGML_COMPUTE_BACKEND_CPU && be < GGML_COMPUTE_BACKEND_COUNT) {
totals[op].backend_subtotals[be].total_us += node->perf_time_us;
totals[op].backend_subtotals[be].runs += node->perf_runs;
totals[op].backend_subtotals[be].tsi_kernel_count += node->tsi_kernel_runs;
}

if (op == GGML_OP_UNARY) {
enum ggml_unary_op subop = ggml_get_unary_op(node);
totals[op].unary_subtotals[subop].total_us += node->perf_time_us;
totals[op].unary_subtotals[subop].runs += node->perf_runs;
totals[op].unary_subtotals[subop].tsi_kernel_count += node->tsi_kernel_runs;
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2791,7 +2791,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
#elif defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s %16s\n", "Op", "Target", "Runs", "TSI_KERNEL-RUN", "Total us", "Avg us");

for (int i = 0; i < GGML_OP_COUNT; ++i) {
if (totals[i].runs > 0) {
Expand All @@ -2801,10 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
char padded_backend[7] = {0}; // 6 chars + null terminator
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name);

LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16.2f\n",
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16ld %16.2f\n",
totals[i].op_name ? totals[i].op_name : "UNKNOWN",
padded_backend,
totals[i].backend_subtotals[b].runs,
totals[i].backend_subtotals[b].tsi_kernel_count,
totals[i].backend_subtotals[b].total_us,
(double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs);
}
Expand All @@ -2826,10 +2827,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
char padded_backend[7] = {0};
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name ? backend_name : "UNK");

LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16.2f\n",
LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16ld %16.2f\n",
ggml_unary_op_name((enum ggml_unary_op) j),
padded_backend,
totals[i].unary_subtotals[j].runs,
totals[i].unary_subtotals[j].tsi_kernel_count,
totals[i].unary_subtotals[j].total_us,
(double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
}
Expand Down