From 647452bbabf16cbfa00a5a61dd5055d0120195fb Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Mon, 20 Oct 2025 11:04:02 -0700 Subject: [PATCH 1/2] @FIR-1033 - llama.cpp crash with below prompt for model SakanaAI-TinySwallow-1.5B-Instruct-F32.gguf --- src/llama-context.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 24491e9309a5c..5c5a5c912bed1 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1090,12 +1090,14 @@ int llama_context::decode(const llama_batch & batch_inp) { ggml_status status; const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status); -#if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) +if (res) { + #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) ggml_perf_accumulate(perf_totals, res->get_gf()); -#elif defined(GGML_PERF_DETAIL) + #elif defined(GGML_PERF_DETAIL) ggml_perf_accumulate(perf_totals, res->get_gf()); ggml_perf_write_detailed_csv(res->get_gf(), perf_all_shape_fp); -#endif /* GML_PERF-related flags */ + #endif /* GML_PERF-related flags */ +} if (!res) { @@ -1121,6 +1123,7 @@ int llama_context::decode(const llama_batch & batch_inp) { memory->seq_rm(s, pos_min[s], -1); } + printf("\n ANOOP Status vakue %d ", status); switch (status) { case GGML_STATUS_ABORTED: return 2; case GGML_STATUS_ALLOC_FAILED: return -2; From 44f9ba987270491eee3018ccfed437b09c0ef723 Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Mon, 20 Oct 2025 11:25:32 -0700 Subject: [PATCH 2/2] Address Ashish's comment --- src/llama-context.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 5c5a5c912bed1..c9b54c0fcf96a 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1090,14 +1090,16 @@ int llama_context::decode(const llama_batch & batch_inp) { ggml_status status; const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status); -if (res) { - #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) +#if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) + if (res) { ggml_perf_accumulate(perf_totals, res->get_gf()); - #elif defined(GGML_PERF_DETAIL) + } +#elif defined(GGML_PERF_DETAIL) + if (res) { ggml_perf_accumulate(perf_totals, res->get_gf()); ggml_perf_write_detailed_csv(res->get_gf(), perf_all_shape_fp); - #endif /* GML_PERF-related flags */ -} + } +#endif /* GML_PERF-related flags */ if (!res) { @@ -1123,7 +1125,6 @@ if (res) { memory->seq_rm(s, pos_min[s], -1); } - printf("\n ANOOP Status vakue %d ", status); switch (status) { case GGML_STATUS_ABORTED: return 2; case GGML_STATUS_ALLOC_FAILED: return -2;