mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-04-26 09:06:07 +00:00
CANN: Support Opt CONV_TRANSPOSE_1D and ELU (#12786)
* [CANN] Support ELU and CONV_TRANSPOSE_1D * [CANN]Modification review comments * [CANN]Modification review comments * [CANN]name adjustment * [CANN]remove lambda used in template * [CANN]Use std::func instead of template * [CANN]Modify the code according to the review comments --------- Signed-off-by: noemotiovon <noemotiovon@gmail.com>
This commit is contained in:
parent
0090950f67
commit
6e1c4cebdb
.devops
.github/workflows
ggml/src/ggml-cann
@ -1,4 +1,4 @@
|
|||||||
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
|
||||||
|
|
||||||
FROM ascendai/cann:$ASCEND_VERSION AS build
|
FROM ascendai/cann:$ASCEND_VERSION AS build
|
||||||
|
|
||||||
@ -6,7 +6,7 @@ WORKDIR /app
|
|||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN yum install -y gcc g++ cmake make
|
RUN yum install -y gcc g++ cmake make libcurl-devel
|
||||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||||
|
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@ -1771,7 +1771,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
cann:
|
cann:
|
||||||
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
|
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
|
||||||
device:
|
device:
|
||||||
- 'ascend910b3'
|
- 'ascend910b3'
|
||||||
build:
|
build:
|
||||||
@ -1784,7 +1784,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
yum update -y
|
yum update -y
|
||||||
yum install -y git gcc gcc-c++ make cmake
|
yum install -y git gcc gcc-c++ make cmake libcurl-devel
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
|
@ -57,6 +57,8 @@
|
|||||||
#include <aclnnop/aclnn_sub.h>
|
#include <aclnnop/aclnn_sub.h>
|
||||||
#include <aclnnop/aclnn_mul.h>
|
#include <aclnnop/aclnn_mul.h>
|
||||||
#include <aclnnop/aclnn_div.h>
|
#include <aclnnop/aclnn_div.h>
|
||||||
|
#include <aclnnop/aclnn_convolution.h>
|
||||||
|
#include <aclnnop/aclnn_elu.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -86,6 +88,20 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclT
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_cann_unary_op(
|
||||||
|
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
||||||
|
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
|
unary_op(ctx, acl_src, acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Repeats elements of a tensor along each dimension according to the
|
* @brief Repeats elements of a tensor along each dimension according to the
|
||||||
* specified repeat array.
|
* specified repeat array.
|
||||||
@ -2585,3 +2601,49 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
ggml_tensor * src1 = dst->src[1];
|
||||||
|
|
||||||
|
// stride
|
||||||
|
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
|
||||||
|
|
||||||
|
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
|
||||||
|
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
|
||||||
|
|
||||||
|
int64_t strideVal[1];
|
||||||
|
strideVal[0] = s0;
|
||||||
|
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
|
||||||
|
int64_t paddingVal[] = {0};
|
||||||
|
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
|
||||||
|
int64_t dilationVal[] = {1};
|
||||||
|
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
|
||||||
|
bool transposed = true;
|
||||||
|
int64_t groups = 1;
|
||||||
|
int8_t cubeMathType = 0;
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
|
||||||
|
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_weight));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
|
||||||
|
aclTensor* acl_input = ggml_cann_create_tensor(src0);
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
|
float alphaValue = 1.0f;
|
||||||
|
aclScalar* alpha = nullptr;
|
||||||
|
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(Elu, acl_input, alpha, alpha, alpha,
|
||||||
|
acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_input));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
}
|
||||||
|
@ -1,15 +1,4 @@
|
|||||||
#ifndef CANN_ACLNN_OPS
|
|
||||||
#define CANN_ACLNN_OPS
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @file acl_tensor
|
|
||||||
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
|
||||||
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
|
||||||
* functions.
|
|
||||||
* @author hipudding <huafengchun@gmail.com>
|
|
||||||
* @author wangshuai09 <391746016@qq.com>
|
|
||||||
* @date July 15, 2024
|
|
||||||
*
|
|
||||||
* Copyright (c) 2023-2024 The ggml authors
|
* Copyright (c) 2023-2024 The ggml authors
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
@ -31,6 +20,9 @@
|
|||||||
* IN THE SOFTWARE.
|
* IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef CANN_ACLNN_OPS
|
||||||
|
#define CANN_ACLNN_OPS
|
||||||
|
|
||||||
#include <aclnnop/aclnn_abs.h>
|
#include <aclnnop/aclnn_abs.h>
|
||||||
#include <aclnnop/aclnn_neg.h>
|
#include <aclnnop/aclnn_neg.h>
|
||||||
#include <aclnnop/aclnn_exp.h>
|
#include <aclnnop/aclnn_exp.h>
|
||||||
@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||||||
* operation is executed using the CANN backend for optimized performance.
|
* operation is executed using the CANN backend for optimized performance.
|
||||||
*
|
*
|
||||||
* @param ctx The CANN context used for operations.
|
* @param ctx The CANN context used for operations.
|
||||||
* @param dst The destination tensor where the indices of the maximum values will be stored.
|
* @param dst The destination tensor where the indices of the maximum values will
|
||||||
* dst->op is `GGML_OP_ARGMAX`.
|
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
||||||
*/
|
*/
|
||||||
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
@ -599,6 +591,99 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||||||
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
aclTensor* acl_dst);
|
aclTensor* acl_dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
||||||
|
* output tensor.
|
||||||
|
*
|
||||||
|
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
||||||
|
* If broadcasting is required, it calculates the proper shapes and creates
|
||||||
|
* ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors
|
||||||
|
* based on the original tensor shapes.
|
||||||
|
*
|
||||||
|
* @param src0 The first input tensor (reference shape).
|
||||||
|
* @param src1 The second input tensor (possibly broadcasted).
|
||||||
|
* @param dst The destination/output tensor.
|
||||||
|
* @param acl_src0 Output pointer to the created ACL tensor corresponding to src0.
|
||||||
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
||||||
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
||||||
|
*/
|
||||||
|
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
||||||
|
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
||||||
|
* tensor using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function performs a 1D transposed convolution (also known as
|
||||||
|
* deconvolution) operation on the input tensor. The computed result is stored
|
||||||
|
* in the destination tensor `dst`. The operation is optimized using the CANN
|
||||||
|
* backend for improved performance.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the transposed convolution result
|
||||||
|
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
||||||
|
* using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function performs an element-wise ELU activation on the input
|
||||||
|
* tensor.
|
||||||
|
* The result is written to the destination tensor `dst` in-place.
|
||||||
|
* The ELU function is defined as:
|
||||||
|
*
|
||||||
|
* \text{ELU}(x) =
|
||||||
|
* \begin{cases}
|
||||||
|
* x, & \text{if } x > 0 \\
|
||||||
|
* \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
|
||||||
|
* \end{cases}
|
||||||
|
*
|
||||||
|
* where α (alpha) is a hyperparameter, typically set to 1.0.
|
||||||
|
* This operation is optimized using the CANN backend for high-performance
|
||||||
|
* inference or training.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the ELU-activated result will be stored.
|
||||||
|
* dst->op is expected to be `GGML_OP_ELU`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies a element-wise operation to two input tensors using the CANN
|
||||||
|
* backend.
|
||||||
|
*
|
||||||
|
* This templated function takes a binary operator and applies it to two source
|
||||||
|
* tensors
|
||||||
|
* associated with the destination tensor. The function handles broadcasting as
|
||||||
|
* needed.
|
||||||
|
*
|
||||||
|
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
||||||
|
* the binary operation to be performed. It must take three arguments:
|
||||||
|
* (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*).
|
||||||
|
*
|
||||||
|
* @param ctx The CANN backend context used to manage execution and resources.
|
||||||
|
* @param dst The destination tensor.
|
||||||
|
*/
|
||||||
|
template <auto binary_op>
|
||||||
|
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
|
ggml_tensor* src0 = dst->src[0];
|
||||||
|
ggml_tensor* src1 = dst->src[1];
|
||||||
|
|
||||||
|
aclTensor* acl_src0;
|
||||||
|
aclTensor* acl_src1;
|
||||||
|
aclTensor* acl_dst;
|
||||||
|
|
||||||
|
// Need bcast
|
||||||
|
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
||||||
|
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src0));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Launches an asynchronous task using the memory allocator.
|
* @brief Launches an asynchronous task using the memory allocator.
|
||||||
*
|
*
|
||||||
@ -631,56 +716,6 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||||||
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
|
|
||||||
*
|
|
||||||
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
|
||||||
* If broadcasting is required, it calculates the proper shapes and creates
|
|
||||||
* ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors
|
|
||||||
* based on the original tensor shapes.
|
|
||||||
*
|
|
||||||
* @param src0 The first input tensor (reference shape).
|
|
||||||
* @param src1 The second input tensor (possibly broadcasted).
|
|
||||||
* @param dst The destination/output tensor.
|
|
||||||
* @param acl_src0 Output pointer to the created ACL tensor corresponding to src0.
|
|
||||||
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
|
||||||
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
|
||||||
*/
|
|
||||||
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
|
|
||||||
aclTensor ** acl_src1, aclTensor ** acl_dst);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Applies a element-wise operation to two input tensors using the CANN backend.
|
|
||||||
*
|
|
||||||
* This templated function takes a binary operator and applies it to two source tensors
|
|
||||||
* associated with the destination tensor. The function handles broadcasting as needed.
|
|
||||||
*
|
|
||||||
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
|
||||||
* the binary operation to be performed. It must take three arguments:
|
|
||||||
* (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*).
|
|
||||||
*
|
|
||||||
* @param ctx The CANN backend context used to manage execution and resources.
|
|
||||||
* @param dst The destination tensor.
|
|
||||||
*/
|
|
||||||
template <auto binary_op>
|
|
||||||
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
||||||
ggml_tensor* src0 = dst->src[0];
|
|
||||||
ggml_tensor* src1 = dst->src[1];
|
|
||||||
|
|
||||||
aclTensor* acl_src0;
|
|
||||||
aclTensor* acl_src1;
|
|
||||||
aclTensor* acl_dst;
|
|
||||||
|
|
||||||
// Need bcast
|
|
||||||
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
|
||||||
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src0));
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
||||||
*
|
*
|
||||||
@ -690,7 +725,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||||||
* @tparam unary_op A callable with the signature:
|
* @tparam unary_op A callable with the signature:
|
||||||
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
||||||
* where the first aclTensor is the source and the second is the destination.
|
* where the first aclTensor is the source and the second is the destination.
|
||||||
*
|
|
||||||
* @param ctx The CANN backend context for managing resources and execution.
|
* @param ctx The CANN backend context for managing resources and execution.
|
||||||
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
||||||
*/
|
*/
|
||||||
@ -702,10 +736,30 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|||||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
unary_op(ctx, acl_src, acl_dst);
|
unary_op(ctx, acl_src, acl_dst);
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies a unary operation to a ggml tensor using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function performs a unary operation on the input tensor using
|
||||||
|
* a user-provided lambda or callable object `unary_op`, which accepts the CANN
|
||||||
|
* context and two ACL tensors (source and destination). Internally, this function
|
||||||
|
* creates ACL representations of the ggml tensors and invokes the unary operation.
|
||||||
|
* The result is stored in the destination tensor `dst`. This utility abstracts the
|
||||||
|
* common boilerplate of tensor conversion and cleanup when implementing unary ops.
|
||||||
|
*
|
||||||
|
* @param unary_op A callable that performs the unary operation using CANN APIs.
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the result will be stored.
|
||||||
|
* The source tensor is retrieved from `dst->src[0]`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_unary_op(
|
||||||
|
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
||||||
|
ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
||||||
*
|
*
|
||||||
@ -725,11 +779,12 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|||||||
*/
|
*/
|
||||||
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
||||||
do { \
|
do { \
|
||||||
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
|
auto lambda = [](ggml_backend_cann_context& ctx, \
|
||||||
|
aclTensor* acl_src, \
|
||||||
|
aclTensor* acl_dst) { \
|
||||||
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
||||||
}; \
|
}; \
|
||||||
ggml_cann_unary_op<lambda>(ctx, dst); \
|
ggml_cann_unary_op(lambda, ctx, dst); \
|
||||||
} \
|
} \
|
||||||
while (0)
|
while (0)
|
||||||
|
|
||||||
#endif // CANN_ACLNN_OPS
|
#endif // CANN_ACLNN_OPS
|
||||||
|
@ -1330,12 +1330,13 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
GGML_CANN_CALL_UNARY_OP(Silu);
|
GGML_CANN_CALL_UNARY_OP(Silu);
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_GELU_QUICK: {
|
case GGML_UNARY_OP_GELU_QUICK: {
|
||||||
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) {
|
auto lambda = [](ggml_backend_cann_context& ctx,
|
||||||
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
aclTensor* acl_src,
|
||||||
};
|
aclTensor* acl_dst) {
|
||||||
ggml_cann_unary_op<lambda>(ctx, dst);
|
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
||||||
}
|
};
|
||||||
break;
|
ggml_cann_unary_op(lambda, ctx, dst);
|
||||||
|
} break;
|
||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
GGML_CANN_CALL_UNARY_OP(Tanh);
|
GGML_CANN_CALL_UNARY_OP(Tanh);
|
||||||
break;
|
break;
|
||||||
@ -1354,6 +1355,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
case GGML_UNARY_OP_EXP:
|
case GGML_UNARY_OP_EXP:
|
||||||
GGML_CANN_CALL_UNARY_OP(Exp);
|
GGML_CANN_CALL_UNARY_OP(Exp);
|
||||||
break;
|
break;
|
||||||
|
case GGML_UNARY_OP_ELU:
|
||||||
|
ggml_cann_elu(ctx, dst);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1448,7 +1452,10 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
break;
|
break;
|
||||||
case GGML_OP_SIN:
|
case GGML_OP_SIN:
|
||||||
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||||
|
ggml_cann_conv_transpose_1d(ctx, dst);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1710,6 +1717,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
case GGML_UNARY_OP_GELU_QUICK:
|
case GGML_UNARY_OP_GELU_QUICK:
|
||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
case GGML_UNARY_OP_EXP:
|
case GGML_UNARY_OP_EXP:
|
||||||
|
case GGML_UNARY_OP_ELU:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -1842,6 +1850,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
case GGML_OP_ARGMAX:
|
case GGML_OP_ARGMAX:
|
||||||
case GGML_OP_COS:
|
case GGML_OP_COS:
|
||||||
case GGML_OP_SIN:
|
case GGML_OP_SIN:
|
||||||
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user