Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion ggml/include/ggml-tsavorite.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ extern "C" {

enum ggml_tsavorite_input_tensors_count {
TSAVORITE_UNARY_INPUT_TENSORS = 1,
TSAVORITE_TWO_INPUT_TENSORS = 2
TSAVORITE_TWO_INPUT_TENSORS = 2,
TSAVORITE_IGNORE_TENSORS
};

enum ggml_tsavorite_log_type {
Expand Down Expand Up @@ -141,6 +142,10 @@ enum ggml_tsavorite_kernel_type {
GGML_TSAVORITE_KERNEL_TYPE_GEGLU_QUICK,

GGML_TSAVORITE_KERNEL_TYPE_SOFT_MAX,
GGML_TSAVORITE_KERNEL_TYPE_RESHAPE,
GGML_TSAVORITE_KERNEL_TYPE_VIEW,
GGML_TSAVORITE_KERNEL_TYPE_PERMUTE,
GGML_TSAVORITE_KERNEL_TYPE_TRANSPOSE,

GGML_TSAVORITE_KERNEL_TYPE_COUNT
};
Expand Down
7 changes: 6 additions & 1 deletion ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -979,10 +979,15 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
int cur_backend_id = -1;
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
int * node_backend_id = &tensor_backend_id(node);
if (ggml_is_view_op(node->op)) {
if(node->src[0] && (sched->n_backends >= 1)) {
*node_backend_id = sched->n_backends -1;
node_backend_id = &tensor_backend_id(node->src[0]);
*node_backend_id = sched->n_backends -1;
}
continue;
}
int * node_backend_id = &tensor_backend_id(node);
if (*node_backend_id != -1) {
if (*node_backend_id == sched->n_backends - 1) {
// skip cpu (lowest prio backend)
Expand Down
42 changes: 34 additions & 8 deletions ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,10 +819,16 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic
case GGML_OP_SQRT:
case GGML_OP_SQR:
case GGML_OP_SIN:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:

case GGML_OP_RMS_NORM:
#ifdef GGML_TARGET_POSIX
case GGML_OP_SOFT_MAX:
#endif /* GGML_TARGET_POSIX */

#ifdef GGML_TARGET_POSIX_DEBUG
case GGML_OP_SOFT_MAX:
#endif /* GGML_TARGET_POSIX_DEBUG */
break;
case GGML_OP_GLU:
{
Expand Down Expand Up @@ -1063,6 +1069,20 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
}
num_of_input_tensors = TSAVORITE_TWO_INPUT_TENSORS;
break;
case GGML_OP_RESHAPE:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_RESHAPE;
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
break;
case GGML_OP_VIEW:
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_VIEW;
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
break;
case GGML_OP_PERMUTE:
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
break;
case GGML_OP_TRANSPOSE:
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
break;
case GGML_OP_UNARY:
switch (ggml_get_unary_op(node)) {
case GGML_UNARY_OP_NEG:
Expand Down Expand Up @@ -1093,10 +1113,10 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
return GGML_STATUS_ABORTED;
}

if (!ctx->kernels[kernel_type].pipeline ||
if ((num_of_input_tensors != TSAVORITE_IGNORE_TENSORS) && (!ctx->kernels[kernel_type].pipeline ||
(!ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type] &&
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type] &&
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type])) {
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type]))) {
GGML_TSAVORITE_LOG_ERROR("Kernel Type %d, not supported \n", kernel_type);
return GGML_STATUS_ABORTED;
}
Expand Down Expand Up @@ -2128,10 +2148,16 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
case GGML_OP_SQRT:
case GGML_OP_SQR:
case GGML_OP_SIN:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
case GGML_OP_RMS_NORM:
#ifdef GGML_TARGET_POSIX
case GGML_OP_SOFT_MAX:
#endif /* GGML_TARGET_POSIX */

#ifdef GGML_TARGET_POSIX_DEBUG
case GGML_OP_SOFT_MAX:
#endif /* GGML_TARGET_POSIX_DEBUG */

break;
case GGML_OP_GLU:
{
Expand Down