From 9d65b92953fe3f674dd7a5d51a12e900cdc8682c Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Thu, 29 May 2025 13:23:31 -0700 Subject: [PATCH 1/2] @FIR-709 - GGML: Adding SILU Kernel --- README.md | 49 +++++++++++++++++ docs/build.md | 63 ++++++++++++++++++++++ ggml-tsi-kernel | 2 +- ggml/include/ggml-tsavorite.h | 2 + ggml/src/ggml-backend.cpp | 7 ++- ggml/src/ggml-tsavorite/ggml-tsavorite.cpp | 30 +++++++---- tsi-pkg-build.sh | 10 ++-- 7 files changed, 147 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index d1cb8d8336229..d8371a472a675 100644 --- a/README.md +++ b/README.md @@ -580,3 +580,52 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc - [minja](https://github.com/google/minja) - Minimal Jinja parser in C++, used by various tools/examples - MIT License - [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License - [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html) + +#### TSI compilation steps +```bash +#Pull the repo frim tsisw as follows +git clone git@github.com:tsisw/llama.cpp.git -b FIR-699 + +#Ensure prerequisites are met as follows +cd llama.cpp/ +git submodule update --recursive --init +cd ggml-tsi-kernel/ +module load tsi4 gcc/13.3.0 +python3 -m venv blob-creation +source blob-creation/bin/activate +pip install -r /proj/rel/sw/mlir-compiler/python/requirements-common.txt +pip install /proj/rel/sw/mlir-compiler/python/mlir_external_packages-1.2.1-py3-none-any.whl +pip install onnxruntime-training + +#build TSI kernels for the Tsavorite backend +#First for FPGA +cd fpga-kernel +cmake -B build-fpga +./create-all-kernels.sh +#The for Posix Use cases +cd ../posix-kernel/ +./create-all-kernels.sh + +#Change directory to top level llama.cpp +cd ../../ + +#Compile for posix with build-posix as a target folder + +cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix +cmake --build build-posix --config Release + +#Compile for fpga with build-fpga as a target folder +export CC="/proj/rel/sw/arm-gnu-toolchain-14.2.rel1-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu-gcc" +export CXX="/proj/rel/sw/arm-gnu-toolchain-14.2.rel1-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu-g++" +cmake -B build-fpga -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=fpga +cmake --build build-fpga --config Release + +#For easy build one can also use which creates a FPGA specific tar bundle tsi-ggml.tz +#If you want to release the build update the TSI-VERSION in the file tsi-pkg-build.sh and add Release as parameter +#when running ./tsi-pkg-build.sh (Note it will overwrite what exists in /proj/rel/sw/ggml so be sure you want to do +#it. Example ./tsi-pkg-build.sh release +./tsi-pkg-build.sh + +``` + +## References diff --git a/docs/build.md b/docs/build.md index c9027c0b580a5..1685adbc916bc 100644 --- a/docs/build.md +++ b/docs/build.md @@ -559,3 +559,66 @@ The GPU may still be used to accelerate some parts of the computation even when In most cases, it is possible to build and use multiple backends at the same time. For example, you can build llama.cpp with both CUDA and Vulkan support by using the `-DGGML_CUDA=ON -DGGML_VULKAN=ON` options with CMake. At runtime, you can specify which backend devices to use with the `--device` option. To see a list of available devices, use the `--list-devices` option. Backends can be built as dynamic libraries that can be loaded dynamically at runtime. This allows you to use the same llama.cpp binary on different machines with different GPUs. To enable this feature, use the `GGML_BACKEND_DL` option when building. + + +## TSI compilation steps + +Following are the instructions to compile for TSI FPGA and Posix backend + +```bash +Pull the repo frim tsisw as follows +git clone git@github.com:tsisw/llama.cpp.git -b FIR-699 +``` + +Ensure prerequisites are met as follows +```bash +cd llama.cpp/ +git submodule update --recursive --init +cd ggml-tsi-kernel/ +module load tsi4 gcc/13.3.0 +python3 -m venv blob-creation +source blob-creation/bin/activate +pip install -r /proj/rel/sw/mlir-compiler/python/requirements-common.txt +pip install /proj/rel/sw/mlir-compiler/python/mlir_external_packages-1.2.1-py3-none-any.whl +pip install onnxruntime-training +``` + +build TSI kernels for the Tsavorite backend +First for FPGA +```bash +cd fpga-kernel +cmake -B build-fpga +./create-all-kernels.sh +``` +The for Posix Use cases +```bash +cd ../posix-kernel/ +./create-all-kernels.sh +``` + +Change directory to top level llama.cpp +```bash +cd ../../ +``` + +Compile for posix with build-posix as a target folder +```bash +cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix +cmake --build build-posix --config Release +``` + +Compile for fpga with build-fpga as a target folder +```bash +export CC="/proj/rel/sw/arm-gnu-toolchain-14.2.rel1-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu-gcc" +export CXX="/proj/rel/sw/arm-gnu-toolchain-14.2.rel1-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu-g++" +cmake -B build-fpga -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=fpga +cmake --build build-fpga --config Release +``` +For easy build one can also use which creates a FPGA specific tar bundle tsi-ggml.tz +If you want to release the build update the TSI-VERSION in the file tsi-pkg-build.sh and add Release as parameter +when running ./tsi-pkg-build.sh (Note it will overwrite what exists in /proj/rel/sw/ggml so be sure you want to do +it. Example ./tsi-pkg-build.sh release + +```bash +./tsi-pkg-build.sh +``` diff --git a/ggml-tsi-kernel b/ggml-tsi-kernel index f7a3ac1ee334c..d1383a04f29d0 160000 --- a/ggml-tsi-kernel +++ b/ggml-tsi-kernel @@ -1 +1 @@ -Subproject commit f7a3ac1ee334c242958ccb2053ecc4854822d87e +Subproject commit d1383a04f29d0160750c0e51ab524d461c6a127b diff --git a/ggml/include/ggml-tsavorite.h b/ggml/include/ggml-tsavorite.h index cd380ddf61ed3..54a8e34662799 100644 --- a/ggml/include/ggml-tsavorite.h +++ b/ggml/include/ggml-tsavorite.h @@ -127,6 +127,7 @@ enum ggml_tsavorite_kernel_type { GGML_TSAVORITE_KERNEL_TYPE_ABS, GGML_TSAVORITE_KERNEL_TYPE_SIN, GGML_TSAVORITE_KERNEL_TYPE_SIGMOID, + GGML_TSAVORITE_KERNEL_TYPE_SILU, GGML_TSAVORITE_KERNEL_TYPE_COUNT }; @@ -159,6 +160,7 @@ extern void _mlir_ciface_txe_neg(void *a, void *res); extern void _mlir_ciface_txe_abs(void *a, void *res); extern void _mlir_ciface_txe_sin(void *a, void *res); extern void _mlir_ciface_txe_sigmoid(void *a, void *res); +extern void _mlir_ciface_txe_silu(void *a, void *res); extern void ggml_tsi_log_tensor_data(tensor_log log_data); #define NUM_OF_TXES 1 diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index b30b4cb386f9f..1238093e41c81 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -939,8 +939,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } else { cur_backend_id = *node_backend_id; } - } else if (cur_backend_id != -1) { - ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); + // Below Code is Optimization which i am disabling for now since we have not implemented other + // Operation at tsavorite + } else if (cur_backend_id != -1 || (node->op == GGML_OP_UNARY)) { + //ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); + ggml_backend_sched_set_if_supported(sched, node, 0, node_backend_id); } } } diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp index e359906b61ce6..573220c8a7027 100644 --- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp +++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp @@ -436,6 +436,11 @@ static txe_compute_pipeline_state_s tsi_kernel_setup(enum ggml_tsavorite_kernel_ kernel_pipeline->kernel_name = "TXE_SIGMOID"; flag = true; break; + case GGML_TSAVORITE_KERNEL_TYPE_SILU: + kernel_pipeline->_mlir_fptr_1_input = &_mlir_ciface_txe_silu; + kernel_pipeline->kernel_name = "TXE_SILU"; + flag = true; + break; default: break; } @@ -580,15 +585,16 @@ static struct ggml_backend_tsavorite_context *ggml_tsavorite_init(ggml_backend_d GGML_TSAVORITE_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_" #e); \ } - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_ADD, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SUB, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_MULT, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_DIV, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SQRT, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_NEG, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_ABS, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SIN, true); - GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SIGMOID, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_ADD, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SUB, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_MULT, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_DIV, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SQRT, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_NEG, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_ABS, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SIN, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SIGMOID, true); + GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SILU, true); } GGML_TSAVORITE_LOG_INFO("End %s\n", __func__); @@ -695,6 +701,7 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_SIGMOID: + case GGML_UNARY_OP_SILU: break; default: return false; @@ -852,6 +859,10 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, kernel_type = GGML_TSAVORITE_KERNEL_TYPE_SIGMOID; num_of_input_tensors = TSAVORITE_UNARY_INPUT_TENSORS; break; + case GGML_UNARY_OP_SILU: + kernel_type = GGML_TSAVORITE_KERNEL_TYPE_SILU; + num_of_input_tensors = TSAVORITE_UNARY_INPUT_TENSORS; + break; default: ggml_backend_tsavorite_device_rel( (struct ggml_backend_tsavorite_device_context *)backend->device->context); @@ -1806,6 +1817,7 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev, case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_SIGMOID: + case GGML_UNARY_OP_SILU: break; default: return false; diff --git a/tsi-pkg-build.sh b/tsi-pkg-build.sh index b6b998671544c..4d6a8c736a5a8 100755 --- a/tsi-pkg-build.sh +++ b/tsi-pkg-build.sh @@ -67,10 +67,12 @@ fi cat > ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh << EOL #!/bin/bash export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\$(pwd) -mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_mult -mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_add -cp blobs ${TSI_BLOB_INSTALL_DIR}/txe_mult/ -r -cp blobs ${TSI_BLOB_INSTALL_DIR}/txe_add/ -r +tsi_kernels=("add" "sub" "mult" "div" "abs" "inv" "neg" "sin" "sqrt" "sigmoid" "silu") + +for kernel in "${tsi_kernels[@]}"; do + mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_$kernel + cp blobs ${TSI_BLOB_INSTALL_DIR}/txe_$kernel/ -r +done EOL chmod +x ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh cp ${GGML_TSI_INSTALL_DIR}/fpga/blobs ${TSI_GGML_BUNDLE_INSTALL_DIR}/ -r From f919789e23efb9270616cff3970e6784f3fe5119 Mon Sep 17 00:00:00 2001 From: Anoop Kapoor Date: Thu, 29 May 2025 14:17:33 -0700 Subject: [PATCH 2/2] @FIR-709: Fixed the script --- tsi-pkg-build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsi-pkg-build.sh b/tsi-pkg-build.sh index 4d6a8c736a5a8..2dd5f048871b7 100755 --- a/tsi-pkg-build.sh +++ b/tsi-pkg-build.sh @@ -64,14 +64,14 @@ if [ -e ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh ]; then rm -fr ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh fi -cat > ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh << EOL +cat > ./${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh << EOL #!/bin/bash export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\$(pwd) tsi_kernels=("add" "sub" "mult" "div" "abs" "inv" "neg" "sin" "sqrt" "sigmoid" "silu") -for kernel in "${tsi_kernels[@]}"; do - mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_$kernel - cp blobs ${TSI_BLOB_INSTALL_DIR}/txe_$kernel/ -r +for kernel in "\${tsi_kernels[@]}"; do + mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_\$kernel + cp blobs ${TSI_BLOB_INSTALL_DIR}/txe_\$kernel/ -r done EOL chmod +x ${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh