diff --git a/ggml/include/ggml-tsavorite.h b/ggml/include/ggml-tsavorite.h index d48373d4b08cd..1b4fecb7aafc3 100644 --- a/ggml/include/ggml-tsavorite.h +++ b/ggml/include/ggml-tsavorite.h @@ -57,7 +57,8 @@ extern "C" { enum ggml_tsavorite_input_tensors_count { TSAVORITE_UNARY_INPUT_TENSORS = 1, - TSAVORITE_TWO_INPUT_TENSORS = 2 + TSAVORITE_TWO_INPUT_TENSORS = 2, + TSAVORITE_IGNORE_TENSORS }; enum ggml_tsavorite_log_type { @@ -141,6 +142,10 @@ enum ggml_tsavorite_kernel_type { GGML_TSAVORITE_KERNEL_TYPE_GEGLU_QUICK, GGML_TSAVORITE_KERNEL_TYPE_SOFT_MAX, + GGML_TSAVORITE_KERNEL_TYPE_RESHAPE, + GGML_TSAVORITE_KERNEL_TYPE_VIEW, + GGML_TSAVORITE_KERNEL_TYPE_PERMUTE, + GGML_TSAVORITE_KERNEL_TYPE_TRANSPOSE, GGML_TSAVORITE_KERNEL_TYPE_COUNT }; diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 10e57ee33e162..6fa6e30fc9c81 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -979,10 +979,15 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra int cur_backend_id = -1; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; + int * node_backend_id = &tensor_backend_id(node); if (ggml_is_view_op(node->op)) { + if(node->src[0] && (sched->n_backends >= 1)) { + *node_backend_id = sched->n_backends -1; + node_backend_id = &tensor_backend_id(node->src[0]); + *node_backend_id = sched->n_backends -1; + } continue; } - int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id != -1) { if (*node_backend_id == sched->n_backends - 1) { // skip cpu (lowest prio backend) diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp index bf24888eb0cc8..7b52a97e9ea19 100644 --- a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp +++ b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp @@ -819,10 +819,16 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic case GGML_OP_SQRT: case GGML_OP_SQR: case GGML_OP_SIN: + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + case GGML_OP_TRANSPOSE: + case GGML_OP_RMS_NORM: - #ifdef GGML_TARGET_POSIX - case GGML_OP_SOFT_MAX: - #endif /* GGML_TARGET_POSIX */ + +#ifdef GGML_TARGET_POSIX_DEBUG + case GGML_OP_SOFT_MAX: +#endif /* GGML_TARGET_POSIX_DEBUG */ break; case GGML_OP_GLU: { @@ -1063,6 +1069,20 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, } num_of_input_tensors = TSAVORITE_TWO_INPUT_TENSORS; break; + case GGML_OP_RESHAPE: + kernel_type = GGML_TSAVORITE_KERNEL_TYPE_RESHAPE; + num_of_input_tensors = TSAVORITE_IGNORE_TENSORS; + break; + case GGML_OP_VIEW: + kernel_type = GGML_TSAVORITE_KERNEL_TYPE_VIEW; + num_of_input_tensors = TSAVORITE_IGNORE_TENSORS; + break; + case GGML_OP_PERMUTE: + num_of_input_tensors = TSAVORITE_IGNORE_TENSORS; + break; + case GGML_OP_TRANSPOSE: + num_of_input_tensors = TSAVORITE_IGNORE_TENSORS; + break; case GGML_OP_UNARY: switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_NEG: @@ -1093,10 +1113,10 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend, return GGML_STATUS_ABORTED; } - if (!ctx->kernels[kernel_type].pipeline || + if ((num_of_input_tensors != TSAVORITE_IGNORE_TENSORS) && (!ctx->kernels[kernel_type].pipeline || (!ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type] && !ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type] && - !ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type])) { + !ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type]))) { GGML_TSAVORITE_LOG_ERROR("Kernel Type %d, not supported \n", kernel_type); return GGML_STATUS_ABORTED; } @@ -2128,10 +2148,16 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev, case GGML_OP_SQRT: case GGML_OP_SQR: case GGML_OP_SIN: + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + case GGML_OP_TRANSPOSE: case GGML_OP_RMS_NORM: - #ifdef GGML_TARGET_POSIX - case GGML_OP_SOFT_MAX: - #endif /* GGML_TARGET_POSIX */ + +#ifdef GGML_TARGET_POSIX_DEBUG + case GGML_OP_SOFT_MAX: +#endif /* GGML_TARGET_POSIX_DEBUG */ + break; case GGML_OP_GLU: {