Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ggml-tsi-kernel
12 changes: 12 additions & 0 deletions ggml/include/ggml-tsavorite.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ enum ggml_tsavorite_kernel_type {
GGML_TSAVORITE_KERNEL_TYPE_RMS_NORM,
GGML_TSAVORITE_KERNEL_TYPE_SIGMOID,
GGML_TSAVORITE_KERNEL_TYPE_SILU,
//Below GELU Kernel
GGML_TSAVORITE_KERNEL_TYPE_REGLU,
GGML_TSAVORITE_KERNEL_TYPE_GEGLU,

// Currently Below kernel Implemented
GGML_TSAVORITE_KERNEL_TYPE_SWIGLU,

GGML_TSAVORITE_KERNEL_TYPE_SWIGLU_OAI,
GGML_TSAVORITE_KERNEL_TYPE_GEGLU_ERF,
GGML_TSAVORITE_KERNEL_TYPE_GEGLU_QUICK,

GGML_TSAVORITE_KERNEL_TYPE_COUNT
};
Expand Down Expand Up @@ -174,6 +184,7 @@ extern void _mlir_ciface_txe_abs_host(void *a, void *res);
extern void _mlir_ciface_txe_sin_host(void *a, void *res);
extern void _mlir_ciface_txe_sigmoid_host(void *a, void *res);
extern void _mlir_ciface_txe_silu_host(void *a, void *res);
extern void _mlir_ciface_txe_swiglu_host(void *a, void *b, void *res);
extern void _mlir_ciface_txe_rms_norm_host(void *a, void *res, void *buf);

/*
Expand All @@ -190,6 +201,7 @@ extern void _mlir_ciface_txe_abs_16_host(void *a, void *res);
extern void _mlir_ciface_txe_sin_16_host(void *a, void *res);
extern void _mlir_ciface_txe_sigmoid_16_host(void *a, void *res);
extern void _mlir_ciface_txe_silu_16_host(void *a, void *res);
extern void _mlir_ciface_txe_swiglu_16_host(void *a, void *b, void *res);
extern void _mlir_ciface_txe_rms_norm_16_host(void *a, void *res, void *buf);

extern void ggml_tsi_log_tensor_data(tensor_log log_data);
Expand Down
79 changes: 70 additions & 9 deletions ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,14 @@ static txe_compute_pipeline_state_s tsi_kernel_setup(enum ggml_tsavorite_kernel_
kernel_pipeline->kernel_name = "TXE_RMS_NORM";
flag = true;
break;
case GGML_TSAVORITE_KERNEL_TYPE_SWIGLU:
{
kernel_pipeline->_mlir_fptr_2_input[DATA_TYPE_F32_INDEX] = &_mlir_ciface_txe_swiglu_host;
kernel_pipeline->_mlir_fptr_2_input[DATA_TYPE_F16_INDEX] = &_mlir_ciface_txe_swiglu_16_host;
kernel_pipeline->kernel_name = "TXE_SWI_GLU";
flag = true;
break;
}
default:
break;
}
Expand Down Expand Up @@ -625,6 +633,7 @@ static struct ggml_backend_tsavorite_context *ggml_tsavorite_init(ggml_backend_d
GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SIGMOID, true);
GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SILU, true);
GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_RMS_NORM, true);
GGML_TSAVORITE_KERNEL(GGML_TSAVORITE_KERNEL_TYPE_SWIGLU, true);
}

GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
Expand Down Expand Up @@ -704,7 +713,7 @@ static ggml_backend_tsavorite_buffer_s ggml_tsavorite_get_buffer(struct ggml_ten
return tsi_nil;
}
#endif
bool is_op_dtype_consistent_with_src(const struct ggml_tensor *op) {
static bool is_op_dtype_consistent_with_src(const struct ggml_tensor *op) {
uint32_t tensor_data_type = op->type;
for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
if (op->src[i] != NULL) {
Expand All @@ -720,16 +729,13 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
if (!ctx_dev)
return false;
for (size_t i = 0, n = 3; i < n; ++i) {
if (op->src[i] != NULL && op->src[i]->type != GGML_TYPE_F32) {
return false;
}
}

if (op->type != GGML_TYPE_F32 || op->type != GGML_TYPE_F16)
if (op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16)
return false;

if (!is_op_dtype_consistent_with_src(op))
return false;

switch (op->op) {
case GGML_OP_NONE:
case GGML_OP_ADD:
Expand All @@ -739,9 +745,15 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic
case GGML_OP_SQRT:
case GGML_OP_SQR:
case GGML_OP_SIN:
break;
case GGML_OP_RMS_NORM:
break;
case GGML_OP_GLU:
{
const ggml_glu_op op_ext = ggml_get_glu_op(op);
if (op_ext != GGML_GLU_OP_SWIGLU)
return false;
break;
}
case GGML_OP_UNARY:
switch (ggml_get_unary_op(op)) {
case GGML_UNARY_OP_NEG:
Expand Down Expand Up @@ -815,6 +827,36 @@ static MemRefDescriptor<Rank>* create_mlir_buf(int K) {
return header;
}

static enum ggml_tsavorite_kernel_type tsi_glu_kernel_type(struct ggml_tensor *node) {
const ggml_glu_op op = ggml_get_glu_op(node);
enum ggml_tsavorite_kernel_type kernel_type;

switch (op) {
case GGML_GLU_OP_REGLU:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_REGLU;
break;
case GGML_GLU_OP_GEGLU:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_GEGLU;
break;
case GGML_GLU_OP_SWIGLU:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_SWIGLU;
break;
case GGML_GLU_OP_SWIGLU_OAI:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_SWIGLU_OAI;
break;
case GGML_GLU_OP_GEGLU_ERF:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_GEGLU_ERF;
break;
case GGML_GLU_OP_GEGLU_QUICK:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_GEGLU_QUICK;
break;
default:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_COUNT;
break;
}
return kernel_type;
}

// nodes are intermediate which has multiple src tensors & operation
// Here we create multiple thread
// Each Thread run the command buffer & pick Tensor and execute and get the result back base on
Expand Down Expand Up @@ -940,6 +982,16 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_RMS_NORM;
num_of_input_tensors = TSAVORITE_UNARY_INPUT_TENSORS;
break;
case GGML_OP_GLU:
kernel_type = tsi_glu_kernel_type(node);
if (!src1)
src1 = src0;
if (kernel_type == GGML_TSAVORITE_KERNEL_TYPE_COUNT) {
GGML_TSAVORITE_LOG_ERROR("\n GGML_OP_GLU sub type is not correct \n");
return GGML_STATUS_ABORTED;
}
num_of_input_tensors = TSAVORITE_TWO_INPUT_TENSORS;
break;
case GGML_OP_UNARY:
switch (ggml_get_unary_op(node)) {
case GGML_UNARY_OP_NEG:
Expand Down Expand Up @@ -1916,10 +1968,12 @@ static bool ggml_backend_tsavorite_device_supports_buft(ggml_backend_dev_t dev,
// ggml_backend_sched_backend_id_from_cur -> ggml_backend_offload_op ->
static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
const struct ggml_tensor *op) {
if (op->type != GGML_TYPE_F32 || op->type != GGML_TYPE_F16)
if (op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16)
return false;

if (!is_op_dtype_consistent_with_src(op))
return false;

switch (op->op) {
case GGML_OP_NONE:
case GGML_OP_ADD:
Expand All @@ -1931,6 +1985,13 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
case GGML_OP_SIN:
case GGML_OP_RMS_NORM:
break;
case GGML_OP_GLU:
{
const ggml_glu_op op_ext = ggml_get_glu_op(op);
if (op_ext != GGML_GLU_OP_SWIGLU)
return false;
break;
}
case GGML_OP_UNARY:
switch (ggml_get_unary_op(op)) {
case GGML_UNARY_OP_NEG:
Expand Down
2 changes: 1 addition & 1 deletion tsi-pkg-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ cat > ./${TSI_GGML_BUNDLE_INSTALL_DIR}/ggml.sh << EOL
# Set up library paths for GCC 13.3.0 compatibility
export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\$(pwd)

tsi_kernels=("add" "sub" "mult" "div" "abs" "inv" "neg" "sin" "sqrt" "sqr" "sigmoid" "silu" "rms_norm" "add_16" "sub_16" "mult_16" "div_16" "abs_16" "inv_16" "neg_16" "sin_16" "sqrt_16" "sqr_16" "sigmoid_16" "silu_16" "rms_norm_16")
tsi_kernels=("add" "sub" "mult" "div" "abs" "inv" "neg" "sin" "sqrt" "sqr" "sigmoid" "silu" "rms_norm" "swiglu" "add_16" "sub_16" "mult_16" "div_16" "abs_16" "inv_16" "neg_16" "sin_16" "sqrt_16" "sqr_16" "sigmoid_16" "silu_16" "rms_norm_16 swiglu_16")

for kernel in "\${tsi_kernels[@]}"; do
mkdir -p ${TSI_BLOB_INSTALL_DIR}/txe_\$kernel
Expand Down