mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-04-14 10:36:07 +00:00
CANN: Support Opt CONV_TRANSPOSE_1D and ELU (#12786)
* [CANN] Support ELU and CONV_TRANSPOSE_1D * [CANN]Modification review comments * [CANN]Modification review comments * [CANN]name adjustment * [CANN]remove lambda used in template * [CANN]Use std::func instead of template * [CANN]Modify the code according to the review comments --------- Signed-off-by: noemotiovon <noemotiovon@gmail.com>
This commit is contained in:
parent
0090950f67
commit
6e1c4cebdb
@ -1,4 +1,4 @@
|
||||
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
||||
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
|
||||
|
||||
FROM ascendai/cann:$ASCEND_VERSION AS build
|
||||
|
||||
@ -6,7 +6,7 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN yum install -y gcc g++ cmake make
|
||||
RUN yum install -y gcc g++ cmake make libcurl-devel
|
||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
||||
|
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@ -1771,7 +1771,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
cann:
|
||||
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
|
||||
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
|
||||
device:
|
||||
- 'ascend910b3'
|
||||
build:
|
||||
@ -1784,7 +1784,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
yum update -y
|
||||
yum install -y git gcc gcc-c++ make cmake
|
||||
yum install -y git gcc gcc-c++ make cmake libcurl-devel
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
|
@ -57,6 +57,8 @@
|
||||
#include <aclnnop/aclnn_sub.h>
|
||||
#include <aclnnop/aclnn_mul.h>
|
||||
#include <aclnnop/aclnn_div.h>
|
||||
#include <aclnnop/aclnn_convolution.h>
|
||||
#include <aclnnop/aclnn_elu.h>
|
||||
#include <float.h>
|
||||
|
||||
#include <cmath>
|
||||
@ -86,6 +88,20 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclT
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_cann_unary_op(
|
||||
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
||||
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
unary_op(ctx, acl_src, acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Repeats elements of a tensor along each dimension according to the
|
||||
* specified repeat array.
|
||||
@ -2585,3 +2601,49 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||
ggml_tensor * src0 = dst->src[0];
|
||||
ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
// stride
|
||||
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
|
||||
|
||||
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
|
||||
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
|
||||
|
||||
int64_t strideVal[1];
|
||||
strideVal[0] = s0;
|
||||
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
|
||||
int64_t paddingVal[] = {0};
|
||||
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
|
||||
int64_t dilationVal[] = {1};
|
||||
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
|
||||
bool transposed = true;
|
||||
int64_t groups = 1;
|
||||
int8_t cubeMathType = 0;
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
|
||||
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_weight));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||
ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
aclTensor* acl_input = ggml_cann_create_tensor(src0);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
float alphaValue = 1.0f;
|
||||
aclScalar* alpha = nullptr;
|
||||
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(Elu, acl_input, alpha, alpha, alpha,
|
||||
acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_input));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
@ -1,15 +1,4 @@
|
||||
#ifndef CANN_ACLNN_OPS
|
||||
#define CANN_ACLNN_OPS
|
||||
|
||||
/**
|
||||
* @file acl_tensor
|
||||
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
||||
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
||||
* functions.
|
||||
* @author hipudding <huafengchun@gmail.com>
|
||||
* @author wangshuai09 <391746016@qq.com>
|
||||
* @date July 15, 2024
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
@ -31,6 +20,9 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CANN_ACLNN_OPS
|
||||
#define CANN_ACLNN_OPS
|
||||
|
||||
#include <aclnnop/aclnn_abs.h>
|
||||
#include <aclnnop/aclnn_neg.h>
|
||||
#include <aclnnop/aclnn_exp.h>
|
||||
@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
* operation is executed using the CANN backend for optimized performance.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the indices of the maximum values will be stored.
|
||||
* dst->op is `GGML_OP_ARGMAX`.
|
||||
* @param dst The destination tensor where the indices of the maximum values will
|
||||
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
||||
*/
|
||||
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
@ -599,6 +591,99 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||
aclTensor* acl_dst);
|
||||
|
||||
/**
|
||||
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
||||
* output tensor.
|
||||
*
|
||||
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
||||
* If broadcasting is required, it calculates the proper shapes and creates
|
||||
* ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors
|
||||
* based on the original tensor shapes.
|
||||
*
|
||||
* @param src0 The first input tensor (reference shape).
|
||||
* @param src1 The second input tensor (possibly broadcasted).
|
||||
* @param dst The destination/output tensor.
|
||||
* @param acl_src0 Output pointer to the created ACL tensor corresponding to src0.
|
||||
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
||||
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
||||
*/
|
||||
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
||||
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
||||
* tensor using the CANN backend.
|
||||
*
|
||||
* @details This function performs a 1D transposed convolution (also known as
|
||||
* deconvolution) operation on the input tensor. The computed result is stored
|
||||
* in the destination tensor `dst`. The operation is optimized using the CANN
|
||||
* backend for improved performance.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the transposed convolution result
|
||||
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
||||
*/
|
||||
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
||||
* using the CANN backend.
|
||||
*
|
||||
* @details This function performs an element-wise ELU activation on the input
|
||||
* tensor.
|
||||
* The result is written to the destination tensor `dst` in-place.
|
||||
* The ELU function is defined as:
|
||||
*
|
||||
* \text{ELU}(x) =
|
||||
* \begin{cases}
|
||||
* x, & \text{if } x > 0 \\
|
||||
* \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
|
||||
* \end{cases}
|
||||
*
|
||||
* where α (alpha) is a hyperparameter, typically set to 1.0.
|
||||
* This operation is optimized using the CANN backend for high-performance
|
||||
* inference or training.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the ELU-activated result will be stored.
|
||||
* dst->op is expected to be `GGML_OP_ELU`.
|
||||
*/
|
||||
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies a element-wise operation to two input tensors using the CANN
|
||||
* backend.
|
||||
*
|
||||
* This templated function takes a binary operator and applies it to two source
|
||||
* tensors
|
||||
* associated with the destination tensor. The function handles broadcasting as
|
||||
* needed.
|
||||
*
|
||||
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
||||
* the binary operation to be performed. It must take three arguments:
|
||||
* (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*).
|
||||
*
|
||||
* @param ctx The CANN backend context used to manage execution and resources.
|
||||
* @param dst The destination tensor.
|
||||
*/
|
||||
template <auto binary_op>
|
||||
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src0 = dst->src[0];
|
||||
ggml_tensor* src1 = dst->src[1];
|
||||
|
||||
aclTensor* acl_src0;
|
||||
aclTensor* acl_src1;
|
||||
aclTensor* acl_dst;
|
||||
|
||||
// Need bcast
|
||||
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
||||
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src0));
|
||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Launches an asynchronous task using the memory allocator.
|
||||
*
|
||||
@ -631,56 +716,6 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/**
|
||||
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
|
||||
*
|
||||
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
||||
* If broadcasting is required, it calculates the proper shapes and creates
|
||||
* ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors
|
||||
* based on the original tensor shapes.
|
||||
*
|
||||
* @param src0 The first input tensor (reference shape).
|
||||
* @param src1 The second input tensor (possibly broadcasted).
|
||||
* @param dst The destination/output tensor.
|
||||
* @param acl_src0 Output pointer to the created ACL tensor corresponding to src0.
|
||||
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
||||
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
||||
*/
|
||||
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
|
||||
aclTensor ** acl_src1, aclTensor ** acl_dst);
|
||||
|
||||
/**
|
||||
* @brief Applies a element-wise operation to two input tensors using the CANN backend.
|
||||
*
|
||||
* This templated function takes a binary operator and applies it to two source tensors
|
||||
* associated with the destination tensor. The function handles broadcasting as needed.
|
||||
*
|
||||
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
||||
* the binary operation to be performed. It must take three arguments:
|
||||
* (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*).
|
||||
*
|
||||
* @param ctx The CANN backend context used to manage execution and resources.
|
||||
* @param dst The destination tensor.
|
||||
*/
|
||||
template <auto binary_op>
|
||||
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src0 = dst->src[0];
|
||||
ggml_tensor* src1 = dst->src[1];
|
||||
|
||||
aclTensor* acl_src0;
|
||||
aclTensor* acl_src1;
|
||||
aclTensor* acl_dst;
|
||||
|
||||
// Need bcast
|
||||
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
||||
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src0));
|
||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
||||
*
|
||||
@ -690,7 +725,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
* @tparam unary_op A callable with the signature:
|
||||
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
||||
* where the first aclTensor is the source and the second is the destination.
|
||||
*
|
||||
* @param ctx The CANN backend context for managing resources and execution.
|
||||
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
||||
*/
|
||||
@ -702,10 +736,30 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
unary_op(ctx, acl_src, acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Applies a unary operation to a ggml tensor using the CANN backend.
|
||||
*
|
||||
* @details This function performs a unary operation on the input tensor using
|
||||
* a user-provided lambda or callable object `unary_op`, which accepts the CANN
|
||||
* context and two ACL tensors (source and destination). Internally, this function
|
||||
* creates ACL representations of the ggml tensors and invokes the unary operation.
|
||||
* The result is stored in the destination tensor `dst`. This utility abstracts the
|
||||
* common boilerplate of tensor conversion and cleanup when implementing unary ops.
|
||||
*
|
||||
* @param unary_op A callable that performs the unary operation using CANN APIs.
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the result will be stored.
|
||||
* The source tensor is retrieved from `dst->src[0]`.
|
||||
*/
|
||||
void ggml_cann_unary_op(
|
||||
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
||||
ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
||||
*
|
||||
@ -725,11 +779,12 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
||||
*/
|
||||
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
||||
do { \
|
||||
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
|
||||
auto lambda = [](ggml_backend_cann_context& ctx, \
|
||||
aclTensor* acl_src, \
|
||||
aclTensor* acl_dst) { \
|
||||
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
||||
}; \
|
||||
ggml_cann_unary_op<lambda>(ctx, dst); \
|
||||
ggml_cann_unary_op(lambda, ctx, dst); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#endif // CANN_ACLNN_OPS
|
||||
|
@ -1330,12 +1330,13 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
||||
GGML_CANN_CALL_UNARY_OP(Silu);
|
||||
break;
|
||||
case GGML_UNARY_OP_GELU_QUICK: {
|
||||
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) {
|
||||
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
||||
};
|
||||
ggml_cann_unary_op<lambda>(ctx, dst);
|
||||
}
|
||||
break;
|
||||
auto lambda = [](ggml_backend_cann_context& ctx,
|
||||
aclTensor* acl_src,
|
||||
aclTensor* acl_dst) {
|
||||
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
||||
};
|
||||
ggml_cann_unary_op(lambda, ctx, dst);
|
||||
} break;
|
||||
case GGML_UNARY_OP_TANH:
|
||||
GGML_CANN_CALL_UNARY_OP(Tanh);
|
||||
break;
|
||||
@ -1354,6 +1355,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
||||
case GGML_UNARY_OP_EXP:
|
||||
GGML_CANN_CALL_UNARY_OP(Exp);
|
||||
break;
|
||||
case GGML_UNARY_OP_ELU:
|
||||
ggml_cann_elu(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -1448,7 +1452,10 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
||||
break;
|
||||
case GGML_OP_SIN:
|
||||
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
||||
break;
|
||||
break;
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
ggml_cann_conv_transpose_1d(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -1710,6 +1717,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
||||
case GGML_UNARY_OP_GELU_QUICK:
|
||||
case GGML_UNARY_OP_TANH:
|
||||
case GGML_UNARY_OP_EXP:
|
||||
case GGML_UNARY_OP_ELU:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -1842,6 +1850,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
||||
case GGML_OP_ARGMAX:
|
||||
case GGML_OP_COS:
|
||||
case GGML_OP_SIN:
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user