Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions common/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,20 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps)
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)

#if ENABLE_LOG
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
#else
#define LOG_INF(...)
#define LOG_WRN(...)
#define LOG_ERR(...)
#define LOG_DBG(...)
#define LOG_CNT(...)
#endif
#define LOG_TSAVORITE(...) LOG_TMPL(GGML_LOG_LEVEL_TSAVORITE, 0, __VA_ARGS__)

#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ extern "C" {
GGML_LOG_LEVEL_WARN = 3,
GGML_LOG_LEVEL_ERROR = 4,
GGML_LOG_LEVEL_CONT = 5, // continue previous log
GGML_LOG_LEVEL_TSAVORITE = 42,
};

// this tensor...
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char *
#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
#define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
#define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
#define GGML_LOG_TSAVORITE(...) ggml_log_internal(GGML_LOG_LEVEL_TSAVORITE , __VA_ARGS__)

#define GGML_DEBUG 0

Expand Down
5 changes: 2 additions & 3 deletions ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,6 @@ static void *ggml_tsavorite_host_malloc(size_t n) {
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);

GGML_TSAVORITE_LOG_INFO("\n Allocating memory from tsi_alloc with size %ld \n", n);
printf("\n ANoop Allocating memory from tsi_alloc with size %ld \n", n);
data = tsi_alloc(n);
GGML_TSAVORITE_LOG_CONT("\n Allocating memory from tsi_alloc with size %ld starting memory %p\n",
n, data);
Expand Down Expand Up @@ -1800,7 +1799,6 @@ static bool ggml_backend_tsavorite_device_supports_buft(ggml_backend_dev_t dev,
// ggml_backend_sched_backend_id_from_cur -> ggml_backend_offload_op ->
static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
const struct ggml_tensor *op) {
// printf("\n ANoop Calling %s \n ", __func__);
if (op->type != GGML_TYPE_F32)
return false;
switch (op->op) {
Expand Down Expand Up @@ -1894,8 +1892,9 @@ static struct ggml_backend_reg_i ggml_backend_tsavorite_reg_i = {
/* .get_proc_address = */ NULL,
};


ggml_backend_reg_t ggml_backend_tsavorite_reg(void) {
ggml_tsavorite_log_type_val = GGML_TSAVORITE_LOG_ERROR;
ggml_tsavorite_log_type_val = GGML_TSAVORITE_LOG_NONE;
ggml_tsavorite_kernel_mode_flag = GGML_TSAVORITE_KERNEL_MODE_MLIR;
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
g_ggml_backend_tsavorite_reg.iface = ggml_backend_tsavorite_reg_i;
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ static void ggml_log_internal_v(enum ggml_log_level level, const char * format,
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
va_list args;
va_start(args, format);
if (level == GGML_LOG_LEVEL_TSAVORITE)
ggml_log_internal_v(level, format, args);
va_end(args);
}
Expand Down
8 changes: 7 additions & 1 deletion src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2615,13 +2615,19 @@ void llama_perf_context_print(const llama_context * ctx) {
const auto data = llama_perf_context(ctx);

const double t_end_ms = 1e-3 * ggml_time_us();

LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, data.t_load_ms);
LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, data.t_p_eval_ms, data.n_p_eval, data.t_p_eval_ms / data.n_p_eval, 1e3 / data.t_p_eval_ms * data.n_p_eval);
LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval);
LLAMA_LOG_INFO("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));

LLAMA_LOG_TSAVORITE("%s: load time = %10.2f ms\n", __func__, data.t_load_ms);
LLAMA_LOG_TSAVORITE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, data.t_p_eval_ms, data.n_p_eval, data.t_p_eval_ms / data.n_p_eval, 1e3 / data.t_p_eval_ms * data.n_p_eval);
LLAMA_LOG_TSAVORITE("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval);
LLAMA_LOG_TSAVORITE("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));
}

void llama_perf_context_reset(llama_context * ctx) {
Expand Down
1 change: 1 addition & 0 deletions src/llama-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
#define LLAMA_LOG_TSAVORITE(...) llama_log_internal(GGML_LOG_LEVEL_TSAVORITE, __VA_ARGS__)

//
// helpers
Expand Down
2 changes: 2 additions & 0 deletions src/llama-sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2562,6 +2562,8 @@ void llama_perf_sampler_print(const struct llama_sampler * chain) {

LLAMA_LOG_INFO("%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
LLAMA_LOG_TSAVORITE("\n\n%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)",
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
}

void llama_perf_sampler_reset(struct llama_sampler * chain) {
Expand Down
7 changes: 7 additions & 0 deletions tools/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ static std::vector<llama_token> * g_output_tokens;
static bool is_interacting = false;
static bool need_insert_eot = false;

static void my_logger(ggml_log_level level, const char *text, void *user_data) {
if (level == GGML_LOG_LEVEL_TSAVORITE) {
fprintf(stderr, "%s", text); // only show warnings or errors
}
}

static void print_usage(int argc, char ** argv) {
(void) argc;

Expand Down Expand Up @@ -120,6 +126,7 @@ int main(int argc, char ** argv) {
LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
}

llama_log_set(my_logger, nullptr);
LOG_INF("%s: llama backend init\n", __func__);

llama_backend_init();
Expand Down