@FIR935 - GGML: Disable all Math operation detail writing to file #47
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Tested at POSIX and FPGA
#######
FPGA
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin# rm tsi-ggml-0.0.6.tz
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin#
*** file: tsi-ggml-0.0.6.tz
$ sz -vv tsi-ggml-0.0.6.tz
Sending: tsi-ggml-0.0.6.tz
Bytes Sent:14067254 BPS:89451
Transfer complete
*** exit status: 0 ***
(failed reverse-i-search)`tar ': rm tsi-ggml-0.0.6.tz
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin# ls -lrt
-rwxr--r-- 1 root root 47540 Jan 1 1970 vecadd_blob_tvu_main.so
-rwxr--r-- 1 root root 47540 Jan 1 1970 vecadd_blob_tvu_main.blob
-rwxr-xr-x 1 root root 30980 Jan 1 1970 tsi_txe_xos.blob
-rwxr-xr-x 1 root root 51840 Jan 1 1970 tsi_txe_kernel.bin
-rwxr-xr-x 1 root root 821 Jan 1 1970 tsi_shutdown.sh
-rw-r--r-- 1 root root 615 Jan 1 1970 tsi_env.sh
-rw-r--r-- 1 root root 207 Jan 1 1970 tsiShutdown.service
-rwxr-xr-x 1 root root 1231 Jan 1 1970 tnApcMgr_run.sh
-rw-r--r-- 1 root root 239 Jan 1 1970 tnApcMgr.service
-rwxr-xr-x 1 root root 258304 Jan 1 1970 tnApcMgr
-rwxr-xr-x 1 root root 2046 Jan 1 1970 run_platform_test.sh
-rwxr-xr-x 1 root root 82016 Jan 1 1970 recvFromHost
-rw-r--r-- 1 root root 546 Jan 1 1970 platform_layout.json
-rwxr-xr-x 1 root root 153568 Jan 1 1970 UAP
drwxr-xr-x 3 root root 880 Mar 9 12:36 tsi-ggml-orig
-rwxr-xr-x 1 root root 1406 Mar 9 12:36 run_llama_cli.sh
-rw-r--r-- 1 root root 0 Mar 9 12:37 log_file_date_09_03_18_time_12_37_43.txt
-rw-r--r-- 1 root root 445 Mar 9 12:37 results_file_date_09_03_18_time_12_37_43.csv
-rw-r--r-- 1 root root 290 Mar 9 12:38 txe_mem_tests_execute.sh
-rw-r--r-- 1 root root 3257 Mar 9 12:46 results_file_date_09_03_18_time_12_38_13.csv
-rw-r--r-- 1 root root 6445 Mar 9 12:46 log_file_date_09_03_18_time_12_38_13.txt
drwxr-xr-x 3 root root 384 Mar 9 12:52 txe-mem-tests
drwx------ 8 101006 100003 568 Mar 9 12:52 aot-tests
-rw-r--r-- 1 root root 30541 Mar 9 13:08 sys-diagtool.tar.gz
-rwxr-xr-x 1 root root 80784 Mar 9 13:09 sys-diagtool
-rw-r--r-- 1 root root 3257 Mar 9 13:09 results_file_date_09_03_18_time_13_09_18.csv
-rw-r--r-- 1 root root 6445 Mar 9 13:09 log_file_date_09_03_18_time_13_09_18.txt
-rw-r--r-- 1 root root 14066294 Mar 9 13:28 tsi-ggml-aws-latest.tz
-rwxr-xr-x 1 root root 1446 Mar 9 2018 run_llama_cli.sh_old
-rwxr-xr-x 1 root root 1318 Mar 9 2018 run_llama_cli.sh.new
drwxr-xr-x 3 root root 960 Mar 9 2018 tsi-ggml-backup
drwxr-xr-x 3 root root 960 Mar 10 2018 tsi-ggml-orig_aug27
-rw-r--r-- 1 root root 0 Mar 10 2018 results_file_date_10_03_18_time_01_40_36.csv
-rw-r--r-- 1 root root 0 Mar 10 2018 log_file_date_10_03_18_time_01_40_36.txt
drwxr-xr-x 3 root root 960 Mar 10 2018 tsi-ggml_backup_aug26
-rw-r--r-- 1 root root 14067254 Sep 3 2025 tsi-ggml-0.0.6.tz
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin# tar -zxvf tsi-ggml-0.0.6.tz
tsi-ggml/blobs/
tsi-ggml/blobs/txe_add.blob
tsi-ggml/blobs/txe_sub.blob
tsi-ggml/blobs/txe_mult.blob
tsi-ggml/blobs/txe_div.blob
tsi-ggml/blobs/txe_abs.blob
tsi-ggml/blobs/txe_neg.blob
tsi-ggml/blobs/txe_sqrt.blob
tsi-ggml/blobs/txe_sqr.blob
tsi-ggml/blobs/txe_inv.blob
tsi-ggml/blobs/txe_sin.blob
tsi-ggml/blobs/txe_sigmoid.blob
tsi-ggml/blobs/txe_silu.blob
tsi-ggml/ggml.sh
tsi-ggml/libggml-base.so
tsi-ggml/libggml-cpu.so
tsi-ggml/libggml.so
tsi-ggml/libggml-tsavorite.so
tsi-ggml/libllama.so
tsi-ggml/llama-cli
tsi-ggml/simple-backend-tsi
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin# cd tsi-ggml
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin/tsi-ggml# ls -lrt
drwxr-xr-x 2 100041 100003 1032 Mar 9 13:18 blobs
-rwxr-xr-x 1 100041 100003 226072 Sep 3 2025 simple-backend-tsi
-rwxr-xr-x 1 100041 100003 19895488 Sep 3 2025 llama-cli
-rwxr-xr-x 1 100041 100003 24499112 Sep 3 2025 libllama.so
-rwxr-xr-x 1 100041 100003 736768 Sep 3 2025 libggml.so
-rwxr-xr-x 1 100041 100003 4123728 Sep 3 2025 libggml-tsavorite.so
-rwxr-xr-x 1 100041 100003 1896376 Sep 3 2025 libggml-cpu.so
-rwxr-xr-x 1 100041 100003 2737688 Sep 3 2025 libggml-base.so
-rwxr-xr-x 1 100041 100003 426 Sep 3 2025 ggml.sh
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin/tsi-ggml# cd ..
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin# ./run_llama_cli.sh
is Luna.
I'm a cat
llama_perf_sampler_print: sampling time = 228.21 ms / 16 runs ( 14.26 ms per token, 70.11 tokens per second)
llama_perf_context_print: load time = 58344.33 ms
llama_perf_context_print: prompt eval time = 0.00 ms / 1 tokens ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: eval time = 290413.57 ms / 9 runs (32268.17 ms per token, 0.03 tokens per second)
llama_perf_context_print: total time = 303789.87 ms / 10 tokens
=== GGML Perf Summary ===
Op Runs Total us Avg us
ADD 396 1356894 3426.50
MUL 603 1735010 2877.30
RMS_NORM 1326 58884 44.41
MUL_MAT 6882 774739330 112574.74
CPY 1145 54508 47.61
CONT 392 3624 9.24
RESHAPE 1572 15225 9.69
VIEW 1372 2188 1.59
PERMUTE 1282 1981 1.55
TRANSPOSE 334 806 2.41
GET_ROWS 88 23240 264.09
SOFT_MAX 512 51697 100.97
ROPE 1288 62548 48.56
UNARY 198 747465 3775.08
-> SILU 198 747465 3775.08
OPU Profiling Results:
Calls Total(ms) T/call Self(ms) Function
[Thread] tsi::runtime::TsavRT::awaitCommandListCompletion (cumulative over all threads)
1197 1566.9610 1.3091 0.0000 [5.33e-01%] [Thread] tsi::runtime::TsavRT::awaitCommandListCompletion
1197 5.80e+05 484.8669 5.80e+05 └─ [197.43%] TXE 0 Idle
603 521.3323 0.8646 521.3323 └─ [1.77e-01%] [ txe_mult ]
198 305.2755 1.5418 305.2755 └─ [1.04e-01%] [ txe_silu ]
396 238.2535 0.6017 238.2535 └─ [8.10e-02%] [ txe_add ]
[Thread] tsi::runtime::TsavRT::finalizeCommandList (cumulative over all threads)
1197 986.2640 0.8239 965.1790 [3.35e-01%] [Thread] tsi::runtime::TsavRT::finalizeCommandList
1197 21.0850 0.0176 21.0850 └─ [7.17e-03%] tsi::runtime::executeWithTimeout
[Thread] tsi::runtime::TsavRT::processResponses (cumulative over all threads)
1197 1965.6200 1.6421 305.8990 [6.69e-01%] [Thread] tsi::runtime::TsavRT::processResponses
1197 1659.7210 1.3866 1659.7210 └─ [5.65e-01%] tsi::runtime::executeWithTimeout
[Thread] tsi::runtime::TsavRTFPGA::finalize (cumulative over all threads)
[Thread] tsi::runtime::TsavRT::allocate (cumulative over all threads)
1198 126.7810 0.1058 126.7810 [4.31e-02%] [Thread] tsi::runtime::TsavRT::allocate
[Thread] tsi::runtime::TsavRTFPGA::loadBlob (cumulative over all threads)
1197 602.6640 0.5035 602.6640 [2.05e-01%] [Thread] tsi::runtime::TsavRTFPGA::loadBlob
[Thread] tsi::runtime::TsavRT::addCommandToList (cumulative over all threads)
1197 90.7660 0.0758 90.7660 [3.09e-02%] [Thread] tsi::runtime::TsavRT::addCommandToList
[Thread] tsi::runtime::TsavRTFPGA::unloadBlob (cumulative over all threads)
1197 95.5640 0.0798 95.5640 [3.25e-02%] [Thread] tsi::runtime::TsavRTFPGA::unloadBlob
[Thread] tsi::runtime::TsavRT::deallocate (cumulative over all threads)
1197 21.9830 0.0184 21.9830 [7.48e-03%] [Thread] tsi::runtime::TsavRT::deallocate
========================================================================================================================
Counter Metrics:
Metric Min Max Avg
Queue_0_Occupancy 0.0000 1.0000 0.6005
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin
root@agilex7_dk_si_agf014ea:/usr/bin/tsi/v0.1.1.tsv34_08_14_2025/bin/tsi-ggml# ls -lrt
drwxr-xr-x 2 100041 100003 1032 Mar 9 13:18 blobs
-rw-r--r-- 1 root root 0 Mar 9 13:18 tsi-op.txt
-rwxr-xr-x 1 100041 100003 226072 Sep 3 2025 simple-backend-tsi
-rwxr-xr-x 1 100041 100003 19895488 Sep 3 2025 llama-cli
-rwxr-xr-x 1 100041 100003 24499112 Sep 3 2025 libllama.so
-rwxr-xr-x 1 100041 100003 736768 Sep 3 2025 libggml.so
-rwxr-xr-x 1 100041 100003 4123728 Sep 3 2025 libggml-tsavorite.so
-rwxr-xr-x 1 100041 100003 1896376 Sep 3 2025 libggml-cpu.so
-rwxr-xr-x 1 100041 100003 2737688 Sep 3 2025 libggml-base.so
-rwxr-xr-x 1 100041 100003 426 Sep 3 2025 ggml.sh
##############
POSIX
akapoor@wssw01 llama.cpp]$ build-posix/bin/llama-cli -p "my cat's name" -m /proj/rel/sw/ggml/models/Tiny-Llama-v0.3-FP32-1.1B-F32.gguf --device tSavorite -c 12288 --temp 0.0 --n-predict 100 --repeat-penalty 1.5 -b 1024 --top-k 50 --top-p 0.9 --repeat-last-n 5 --no-warmup --no-display-prompt
is Luna.
I'm a cat person and I love my cat Luna. She is a very cute cat and I love her fur. She is a very smart cat and she can do many things. She can climb trees and she can jump over furniture. She is a very loyal cat and she always protects her owner. She is a very loving cat and she always cares for her owner. She is a very kind and loving cat. She is
llama_perf_sampler_print: sampling time = 284.64 ms / 106 runs ( 2.69 ms per token, 372.40 tokens per second)
llama_perf_context_print: load time = 4659.61 ms
llama_perf_context_print: prompt eval time = 2695.08 ms / 6 tokens ( 449.18 ms per token, 2.23 tokens per second)
llama_perf_context_print: eval time = 55228.16 ms / 99 runs ( 557.86 ms per token, 1.79 tokens per second)
llama_perf_context_print: total time = 60246.82 ms / 105 tokens
=== GGML Perf Summary ===
Op Runs Total us Avg us
ADD 4400 8610076 1956.84
MUL 6700 10502058 1567.47
RMS_NORM 17190 61468 3.58
MUL_MAT 79024 114192553 1445.04
CPY 16039 49566 3.09
CONT 8165 7171 0.88
RESHAPE 32355 13104 0.41
VIEW 30250 4320 0.14
PERMUTE 30408 5131 0.17
TRANSPOSE 6992 1371 0.20
GET_ROWS 1088 2500 2.30
SOFT_MAX 8688 199860 23.00
ROPE 16806 83053 4.94
UNARY 2200 5166288 2348.31
-> SILU 2200 5166288 2348.31
OPU Profiling Results:
Calls Total(ms) T/call Self(ms) Function
[Thread] tsi::runtime::TsavRT::finalize (cumulative over all threads)
[Thread] tsi::runtime::TsavRTPosix::loadBlob (cumulative over all threads)
13830 13445.8950 0.9722 481.0810 [22.23%] [Thread] tsi::runtime::TsavRTPosix::loadBlob
27660 12959.3770 0.4685 12959.3770 └─ [21.43%] tsi::runtime::executeWithTimeout
13830 5.4370 3.93e-04 5.4370 └─ [8.99e-03%] LOAD_BLOB Command Execution
13830 0.0000 0.0000 0.0000 └─ [0.00e+00%] Command{command=2 (LOAD_BLOB), blob_args=[2181038720[0x820...
13830 0.0000 0.0000 0.0000 └─ [0.00e+00%] TXE 0 Idle
[Thread] tsi::runtime::TsavRTPosix::unloadBlob (cumulative over all threads)
13830 3702.1940 0.2677 467.6380 [ 6.12%] [Thread] tsi::runtime::TsavRTPosix::unloadBlob
27660 3226.9250 0.1167 3226.9250 └─ [ 5.34%] tsi::runtime::executeWithTimeout
13830 7.6310 5.52e-04 7.6310 └─ [1.26e-02%] UNLOAD_BLOB Command Execution
13830 0.0000 0.0000 0.0000 └─ [0.00e+00%] Command{command=3 (UNLOAD_BLOB), blob_args=[2181038720[0x8...
13830 0.0000 0.0000 0.0000 └─ [0.00e+00%] TXE 0 Idle
[Thread] tsi::runtime::TsavRT::processResponses (cumulative over all threads)
13832 5023.6950 0.3632 90.5990 [ 8.31%] [Thread] tsi::runtime::TsavRT::processResponses
13832 4933.0960 0.3566 4933.0960 └─ [ 8.16%] tsi::runtime::executeWithTimeout
[Thread] tsi::runtime::TsavRT::finalizeCommandList (cumulative over all threads)
13830 202.7760 0.0147 184.7870 [3.35e-01%] [Thread] tsi::runtime::TsavRT::finalizeCommandList
13830 17.9890 0.0013 17.9890 └─ [2.97e-02%] tsi::runtime::executeWithTimeout
[Thread] tsi::runtime::TsavRT::allocate (cumulative over all threads)
13831 51.7080 0.0037 51.7080 [8.55e-02%] [Thread] tsi::runtime::TsavRT::allocate
[Thread] tsi::runtime::TsavRT::addCommandToList (cumulative over all threads)
13830 60.3990 0.0044 60.3990 [9.99e-02%] [Thread] tsi::runtime::TsavRT::addCommandToList
[Thread] tsi::runtime::TsavRT::awaitCommandListCompletion (cumulative over all threads)
13830 6396.6260 0.4625 6396.6260 [10.58%] [Thread] tsi::runtime::TsavRT::awaitCommandListCompletion
[Thread] tsi::runtime::TsavRT::deallocate (cumulative over all threads)
13830 22.2120 0.0016 22.2120 [3.67e-02%] [Thread] tsi::runtime::TsavRT::deallocate
========================================================================================================================
Counter Metrics:
Metric Min Max Avg
Queue_0_Occupancy 0.0000 1.0000 0.9995
[akapoor@wssw01 llama.cpp]$ ls -lrt
total 14620
-rw-r--r-- 1 akapoor tsiusers 47860 Sep 2 12:40 AUTHORS
-rw-r--r-- 1 akapoor tsiusers 10182 Sep 2 12:40 CMakeLists.txt
-rw-r--r-- 1 akapoor tsiusers 4008 Sep 2 12:40 CMakePresets.json
-rw-r--r-- 1 akapoor tsiusers 434 Sep 2 12:40 CODEOWNERS
-rw-r--r-- 1 akapoor tsiusers 6510 Sep 2 12:40 CONTRIBUTING.md
-rw-r--r-- 1 akapoor tsiusers 1078 Sep 2 12:40 LICENSE
-rw-r--r-- 1 akapoor tsiusers 50453 Sep 2 12:40 Makefile
-rw-r--r-- 1 akapoor tsiusers 31599 Sep 2 12:40 README.md
-rw-r--r-- 1 akapoor tsiusers 5347 Sep 2 12:40 SECURITY.md
-rwxr-xr-x 1 akapoor tsiusers 21752 Sep 2 12:40 build-xcframework.sh
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 ci
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 cmake
-rwxr-xr-x 1 akapoor tsiusers 290408 Sep 2 12:40 convert_hf_to_gguf.py
-rwxr-xr-x 1 akapoor tsiusers 18505 Sep 2 12:40 convert_hf_to_gguf_update.py
-rwxr-xr-x 1 akapoor tsiusers 19106 Sep 2 12:40 convert_llama_ggml_to_gguf.py
-rwxr-xr-x 1 akapoor tsiusers 18624 Sep 2 12:40 convert_lora_to_gguf.py
drwxr-xr-x 5 akapoor tsiusers 4096 Sep 2 12:40 docs
drwxr-xr-x 28 akapoor tsiusers 4096 Sep 2 12:40 examples
-rw-r--r-- 1 akapoor tsiusers 1556 Sep 2 12:40 flake.lock
-rw-r--r-- 1 akapoor tsiusers 7465 Sep 2 12:40 flake.nix
drwxr-xr-x 5 akapoor tsiusers 4096 Sep 2 12:40 ggml
drwxr-xr-x 5 akapoor tsiusers 4096 Sep 2 12:40 gguf-py
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 grammars
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 include
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 licenses
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 media
-rw-r--r-- 1 akapoor tsiusers 3319 Sep 2 12:40 model-rerun.py
drwxr-xr-x 3 akapoor tsiusers 8192 Sep 2 12:40 models
-rw-r--r-- 1 akapoor tsiusers 163 Sep 2 12:40 mypy.ini
drwxr-xr-x 3 akapoor tsiusers 4096 Sep 2 12:40 pocs
-rw-r--r-- 1 akapoor tsiusers 124786 Sep 2 12:40 poetry.lock
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 prompts
-rw-r--r-- 1 akapoor tsiusers 1336 Sep 2 12:40 pyproject.toml
-rw-r--r-- 1 akapoor tsiusers 616 Sep 2 12:40 pyrightconfig.json
-rw-r--r-- 1 akapoor tsiusers 551 Sep 2 12:40 requirements.txt
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 requirements
drwxr-xr-x 3 akapoor tsiusers 4096 Sep 2 12:40 scripts
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 2 12:40 tests
drwxr-xr-x 17 akapoor tsiusers 4096 Sep 2 12:40 tools
-rwxr-xr-x 1 akapoor tsiusers 3834 Sep 2 12:40 tsi-pkg-build.sh
drwxr-xr-x 7 akapoor tsiusers 4096 Sep 2 13:18 ggml-tsi-kernel
drwxr-xr-x 12 akapoor tsiusers 4096 Sep 2 22:53 build-posix
drwxr-xr-x 4 akapoor tsiusers 4096 Sep 2 22:53 common
drwxr-xr-x 12 akapoor tsiusers 4096 Sep 2 22:54 build-fpga
drwxr-xr-x 3 akapoor tsiusers 4096 Sep 2 22:56 tsi-ggml
-rw-r--r-- 1 akapoor tsiusers 14067478 Sep 2 22:56 tsi-ggml-0.0.6.tz
drwxr-xr-x 2 akapoor tsiusers 4096 Sep 3 10:12 src
-rw-r--r-- 1 akapoor tsiusers 0 Sep 3 10:14 tsi-op.txt
[akapoor@wssw01 llama.cpp]$ git status
On branch FIR-929
Your branch is up to date with 'origin/FIR-929'.
Changes not staged for commit:
(use "git add ..." to u