diff --git a/ggml/src/ggml-cann/acl_tensor.cpp b/ggml/src/ggml-cann/acl_tensor.cpp index d120ce6ac..9b6553c50 100644 --- a/ggml/src/ggml-cann/acl_tensor.cpp +++ b/ggml/src/ggml-cann/acl_tensor.cpp @@ -54,9 +54,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, // added. int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2]; - int64_t acl_storage_len = 0; if (ne == nullptr) { - acl_storage_len = ggml_nbytes(tensor); for (int i = 0; i < GGML_MAX_DIMS; i++) { acl_ne[i] = tensor->ne[i]; // The step size of acl is in elements. @@ -65,14 +63,18 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, } else { // With bcast for (int i = 0; i < dims; i++) { - acl_storage_len += (ne[i] - 1) * nb[i]; acl_ne[i] = ne[i]; acl_stride[i] = nb[i] / ggml_element_size(tensor); } } - // Reverse ne and stride. int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims); + int64_t acl_storage_len = 1; + for (int i = 0; i < final_dims; i++) { + acl_storage_len += (acl_ne[i] - 1) * acl_stride[i]; + } + + // Reverse ne and stride. std::reverse(acl_ne, acl_ne + final_dims); std::reverse(acl_stride, acl_stride + final_dims); diff --git a/ggml/src/ggml-cann/acl_tensor.h b/ggml/src/ggml-cann/acl_tensor.h index 4734a9cb8..93f09937e 100644 --- a/ggml/src/ggml-cann/acl_tensor.h +++ b/ggml/src/ggml-cann/acl_tensor.h @@ -101,14 +101,14 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, tmp_stride[i] = nb[i] / type_size; } + int64_t acl_storage_len = 1; + for (int i = 0; i < dims; i++) { + acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i]; + } + std::reverse(tmp_ne, tmp_ne + dims); std::reverse(tmp_stride, tmp_stride + dims); - int64_t acl_storage_len = 0; - for (int i = 0; i < dims; i++) { - acl_storage_len += (ne[i] - 1) * nb[i]; - } - aclTensor* acl_tensor = aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr); diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 8482bb537..ae13730c0 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -358,8 +358,6 @@ void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst) { void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_tensor* src = dst->src[0]; - GGML_ASSERT(src->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); float min; float max; @@ -1090,8 +1088,6 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) { float eps; memcpy(&eps, dst->op_params, sizeof(float)); - GGML_ASSERT(eps > 0.0f); - uint64_t workspaceSize = 0; aclOpExecutor* executor; void* workspaceAddr = nullptr; @@ -3152,7 +3148,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { // TODO: use ascendc // Only test with LLAMA model. ggml_tensor* src0 = dst->src[0]; // input - ggml_tensor* src2 = dst->src[2]; // freq_factors + // ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now. // param float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow; diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h index 680129c76..51a5cf92f 100644 --- a/ggml/src/ggml-cann/aclnn_ops.h +++ b/ggml/src/ggml-cann/aclnn_ops.h @@ -535,9 +535,6 @@ template src[0]; - GGML_ASSERT(src->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - aclTensor* acl_src = ggml_cann_create_tensor(src); aclTensor* acl_dst = ggml_cann_create_tensor(dst); @@ -566,9 +563,6 @@ template src[0]; - GGML_ASSERT(src->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - aclTensor* acl_src = ggml_cann_create_tensor(src); aclTensor* acl_dst = ggml_cann_create_tensor(dst); diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index da75f77f5..3527bd298 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1458,11 +1458,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) { ACL_CHECK(aclrtSynchronizeDevice()); ACL_CHECK(aclrtResetDevice(cann_ctx->device)); - // finalize when last backend freed. - if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) { - ACL_CHECK(aclFinalize()); - } - delete cann_ctx; delete backend; } @@ -1688,11 +1683,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, } case GGML_OP_MUL_MAT: { switch (op->src[0]->type) { - case GGML_TYPE_Q8_0: case GGML_TYPE_F16: case GGML_TYPE_F32: - case GGML_TYPE_Q4_0: return true; + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q4_0: + // only support contiguous for quantized types. + return ggml_is_contiguous(op->src[0]) && + ggml_is_contiguous(op->src[1]); default: return false; } @@ -1738,13 +1736,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, } case GGML_OP_ROPE: { // TODO: with ops-test v == 1 - float * ext_factor = (float*)((int32_t*)op->op_params + 7); + float ext_factor = 0.0f; + memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float)); // TODO: n_dims <= ne0 if (op->src[0]->ne[0] != op->op_params[1]) { return false; } // TODO: ext_factor != 0 - if (*ext_factor != 0) { + if (ext_factor != 0) { return false; } @@ -1766,6 +1765,16 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, } return true; } + case GGML_OP_POOL_2D: { + const int32_t * opts = (const int32_t *) op->op_params; + const int k0 = opts[1]; + const int k1 = opts[2]; + const int p0 = opts[5]; + const int p1 = opts[6]; + // value of paddingH should be at most half of kernelH + // value of paddingW should be at most half of kernelW + return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2)); + } case GGML_OP_DUP: case GGML_OP_IM2COL: case GGML_OP_CONCAT: @@ -1785,7 +1794,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, case GGML_OP_CLAMP: case GGML_OP_DIAG_MASK_INF: case GGML_OP_SOFT_MAX: - case GGML_OP_POOL_2D: case GGML_OP_SUM_ROWS: case GGML_OP_ARGSORT: case GGML_OP_ACC: