From 70680c48e5f77d2d3138712a6582bd8c1e548922 Mon Sep 17 00:00:00 2001 From: William Tambellini Date: Fri, 28 Feb 2025 05:41:47 -0800 Subject: [PATCH] ggml : upgrade init_tensor API to return a ggml_status (#11854) * Upgrade init_tensor API to return a ggml_status To prepare for an 'abort-free' ggml (ggml not to abort on OOMs but return a OOM status), as agreeed with Diego in the ggml repo, upgrade the init_tensor() and view_init() APIs to return a ggml_status. * misc fixes --------- Co-authored-by: slaren --- .gitignore | 2 + CONTRIBUTING.md | 2 +- ggml/include/ggml-alloc.h | 2 +- ggml/include/ggml-backend.h | 6 +- ggml/src/ggml-alloc.c | 61 ++++++++------ ggml/src/ggml-backend-impl.h | 2 +- ggml/src/ggml-backend.cpp | 17 ++-- ggml/src/ggml-cann/ggml-cann.cpp | 5 +- ggml/src/ggml-cpu/amx/amx.cpp | 3 +- ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp | 3 +- ggml/src/ggml-cuda/ggml-cuda.cu | 8 +- ggml/src/ggml-opencl/ggml-opencl.cpp | 3 +- ggml/src/ggml-rpc/ggml-rpc.cpp | 3 +- ggml/src/ggml-sycl/ggml-sycl.cpp | 8 +- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 3 +- tests/test-backend-ops.cpp | 105 ++++++++++++++----------- 16 files changed, 136 insertions(+), 97 deletions(-) diff --git a/.gitignore b/.gitignore index 56b5ac2c1..2c67ad7f7 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,8 @@ lcov-report/ tags .build/ build* +release +debug !build-info.cmake !build-info.cpp.in !build-info.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8fab0de6f..e68ff9244 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,7 +39,7 @@ _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_ -- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code +- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code - For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) - Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices - Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h index 23600eea9..2cb150fd2 100644 --- a/ggml/include/ggml-alloc.h +++ b/ggml/include/ggml-alloc.h @@ -19,7 +19,7 @@ struct ggml_tallocr { }; GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); -GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); +GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); // Graph allocator /* diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index fc9571c82..64671495b 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -56,7 +56,7 @@ extern "C" { GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer); GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer); - GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); @@ -342,8 +342,8 @@ extern "C" { GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data); // Tensor initialization - GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); - GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor); + GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); + GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor); // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 7244a9cbb..a3d3f6901 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) { return talloc; } -void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) { +enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) { size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); size = GGML_PAD(size, talloc->alignment); @@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso assert(((uintptr_t)addr % talloc->alignment) == 0); - ggml_backend_tensor_alloc(talloc->buffer, tensor, addr); + return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr); } // dynamic tensor allocator @@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) { // utils +static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) { + for (size_t i = 0; i < *n_buffers; i++) { + ggml_backend_buffer_free((*buffers)[i]); + } + free(*buffers); +} + static bool alloc_tensor_range(struct ggml_context * ctx, struct ggml_tensor * first, struct ggml_tensor * last, ggml_backend_buffer_type_t buft, size_t size, ggml_backend_buffer_t ** buffers, size_t * n_buffers) { + ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); if (buffer == NULL) { -#ifndef NDEBUG - GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size); -#endif - for (size_t i = 0; i < *n_buffers; i++) { - ggml_backend_buffer_free((*buffers)[i]); - } - free(*buffers); + GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size); + free_buffers(buffers, n_buffers); return false; } - struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); - - for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { - if (t->data == NULL) { - if (t->view_src == NULL) { - ggml_tallocr_alloc(&tallocr, t); - } else if (t->buffer == NULL) { - ggml_backend_view_init(t); - } - } else { - if (t->view_src != NULL && t->buffer == NULL) { - // view of a pre-allocated tensor - ggml_backend_view_init(t); - } - } - } - *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1)); (*buffers)[(*n_buffers)++] = buffer; + struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); + + for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { + enum ggml_status status = GGML_STATUS_SUCCESS; + if (t->data == NULL) { + if (t->view_src == NULL) { + status = ggml_tallocr_alloc(&tallocr, t); + } else if (t->buffer == NULL) { + status = ggml_backend_view_init(t); + } + } else { + if (t->view_src != NULL && t->buffer == NULL) { + // view of a pre-allocated tensor + status = ggml_backend_view_init(t); + } + } + if (status != GGML_STATUS_SUCCESS) { + GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name); + free_buffers(buffers, n_buffers); + return false; + } + } + return true; } diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index d1c2d76d8..c36c12d65 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -44,7 +44,7 @@ extern "C" { // base address of the buffer void * (*get_base) (ggml_backend_buffer_t buffer); // (optional) initialize a tensor in the buffer (eg. add tensor extras) - void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // tensor data access void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index dba7be33b..184f99af5 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -126,11 +126,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) { return base; } -void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { +enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { // init_tensor is optional if (buffer->iface.init_tensor) { - buffer->iface.init_tensor(buffer, tensor); + return buffer->iface.init_tensor(buffer, tensor); } + return GGML_STATUS_SUCCESS; } void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { @@ -1641,7 +1642,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, // utils -void ggml_backend_view_init(struct ggml_tensor * tensor) { +enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) { GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->view_src != NULL); GGML_ASSERT(tensor->view_src->buffer != NULL); @@ -1649,10 +1650,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) { tensor->buffer = tensor->view_src->buffer; tensor->data = (char *)tensor->view_src->data + tensor->view_offs; - ggml_backend_buffer_init_tensor(tensor->buffer, tensor); + return ggml_backend_buffer_init_tensor(tensor->buffer, tensor); } -void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) { +enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) { GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->data == NULL); GGML_ASSERT(tensor->view_src == NULL); @@ -1662,7 +1663,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor tensor->buffer = buffer; tensor->data = addr; - ggml_backend_buffer_init_tensor(buffer, tensor); + return ggml_backend_buffer_init_tensor(buffer, tensor); } static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, @@ -1708,7 +1709,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_ struct ggml_tensor * dst = node_copies[id]; if (dst->view_src != NULL) { graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); - ggml_backend_view_init(dst); + enum ggml_status status = ggml_backend_view_init(dst); + GGML_ASSERT(status == GGML_STATUS_SUCCESS); } else { ggml_backend_tensor_copy(src, dst); @@ -1823,7 +1825,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t assert(g1->n_nodes == g2->n_nodes); for (int i = 0; i < g1->n_nodes; i++) { - //printf("eval %d/%d\n", i, g1->n_nodes); struct ggml_tensor * t1 = g1->nodes[i]; struct ggml_tensor * t2 = g2->nodes[i]; diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index d410c0244..b8d272cda 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) { * @param buffer The CANN buffer from which to initialize the tensor. * @param tensor Pointer to the tensor to be initialized. */ -static void ggml_backend_cann_buffer_init_tensor( +static enum ggml_status ggml_backend_cann_buffer_init_tensor( ggml_backend_buffer_t buffer, ggml_tensor* tensor) { if (tensor->view_src != NULL && tensor->view_offs == 0) { GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft); - return; + return GGML_STATUS_SUCCESS; } // TODO: can backend doesn't support quantized yet. Just leave the code @@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor( memset_size, 0, memset_size)); } } + return GGML_STATUS_SUCCESS; } // TODO: need handle tensor which has paddings. diff --git a/ggml/src/ggml-cpu/amx/amx.cpp b/ggml/src/ggml-cpu/amx/amx.cpp index 5ec5263ce..0f067137d 100644 --- a/ggml/src/ggml-cpu/amx/amx.cpp +++ b/ggml/src/ggml-cpu/amx/amx.cpp @@ -50,10 +50,11 @@ static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) { return (void *) (buffer->context); } -static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { +static enum ggml_status ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor); GGML_UNUSED(buffer); + return GGML_STATUS_SUCCESS; } static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index b311a5b1c..c24fd56e2 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -4135,10 +4135,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con return nullptr; } -static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { +static enum ggml_status ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { tensor->extra = (void *) const_cast(ggml_aarch64_get_optimal_repack_type(tensor)); GGML_UNUSED(buffer); + return GGML_STATUS_SUCCESS; } static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index ebb2ccae0..d23686d16 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -540,12 +540,12 @@ static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) { return ctx->dev_ptr; } -static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { +static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; if (tensor->view_src != NULL) { assert(tensor->view_src->buffer->buft == buffer->buft); - return; + return GGML_STATUS_SUCCESS; } if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) { @@ -558,6 +558,7 @@ static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, g CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size)); } } + return GGML_STATUS_SUCCESS; } static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { @@ -792,7 +793,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff GGML_UNUSED(buffer); } -static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { +static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context; @@ -838,6 +839,7 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf } } tensor->extra = extra; + return GGML_STATUS_SUCCESS; } static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index f59062460..dc9a718f7 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -1211,7 +1211,7 @@ static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) GGML_UNUSED(buffer); } -static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { +static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; ggml_cl2_init(buffer->buft->device); @@ -1251,6 +1251,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, tensor->extra = extra; } } + return GGML_STATUS_SUCCESS; } // The optimized gemm and gemv kernels are used for large matrices without batch. diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 97873acc7..6c3b80b08 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -464,7 +464,7 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) { return result; } -static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { +static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context; // CUDA backend on the server pads everything to 512 due to CUDA limitations. @@ -478,6 +478,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0); GGML_ASSERT(status); } + return GGML_STATUS_SUCCESS; } static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 792e0569c..d804e6606 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -323,14 +323,14 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) { return ctx->dev_ptr; } -static void +static enum ggml_status ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context; if (tensor->view_src != NULL) { assert(tensor->view_src->buffer->buft == buffer->buft); - return; + return GGML_STATUS_SUCCESS; } ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{}; @@ -348,6 +348,7 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, padded_size - original_size).wait())); } } + return GGML_STATUS_SUCCESS; } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -729,7 +730,7 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff GGML_UNUSED(buffer); } -static void +static enum ggml_status ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported @@ -804,6 +805,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, } } tensor->extra = extra; + return GGML_STATUS_SUCCESS; } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index ce15f620f..a413441eb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -7923,11 +7923,12 @@ static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) { UNUSED(buffer); } -static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { +static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")"); if (tensor->view_src != nullptr) { GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft); } + return GGML_STATUS_SUCCESS; } static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index e1f7e6758..1dc2cdda3 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -467,6 +468,7 @@ struct test_case { // allocate ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1); + if (buf == NULL) { printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1)); ggml_free(ctx); @@ -588,14 +590,13 @@ struct test_case { /* .mem_base = */ NULL, /* .no_alloc = */ true, }; - ggml_context * ctx = ggml_init(params); + ggml_context_ptr ctx(ggml_init(params)); // smart ptr GGML_ASSERT(ctx); - ggml_tensor * out = build_graph(ctx); + ggml_tensor * out = build_graph(ctx.get()); if (op_name != nullptr && op_desc(out) != op_name) { //printf(" %s: skipping\n", op_desc(out).c_str()); - ggml_free(ctx); return true; } @@ -605,7 +606,6 @@ struct test_case { // check if backends support op if (!ggml_backend_supports_op(backend, out)) { printf("not supported\n"); - ggml_free(ctx); return true; } @@ -618,22 +618,26 @@ struct test_case { printf("%*s", last - len, ""); // allocate - ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend); + ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr + if (buf == NULL) { printf("failed to allocate tensors\n"); - ggml_free(ctx); return false; } // randomize tensors - initialize_tensors(ctx); + initialize_tensors(ctx.get()); // build graph - ggml_cgraph * gf = ggml_new_graph_custom(ctx, graph_nodes, false); + ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), graph_nodes, false); ggml_build_forward_expand(gf, out); // warmup run - ggml_backend_graph_compute(backend, gf); + ggml_status status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } // determine number of runs int n_runs; @@ -684,7 +688,11 @@ struct test_case { int total_runs = 0; do { int64_t start_time = ggml_time_us(); - ggml_backend_graph_compute(backend, gf); + ggml_status status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } int64_t end_time = ggml_time_us(); total_time_us += end_time - start_time; @@ -722,10 +730,6 @@ struct test_case { } printf("\n"); - ggml_backend_buffer_free(buf); - - ggml_free(ctx); - return true; } @@ -738,17 +742,16 @@ struct test_case { /* .mem_base = */ NULL, /* .no_alloc = */ true, }; - ggml_context * ctx = ggml_init(params); + ggml_context_ptr ctx(ggml_init(params)); // smart ptr GGML_ASSERT(ctx); - gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true); - gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true); + gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true); + gb = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true); - ggml_tensor * out = build_graph(ctx); + ggml_tensor * out = build_graph(ctx.get()); if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) { //printf(" %s: skipping\n", op_desc(out).c_str()); - ggml_free(ctx); return true; } @@ -756,7 +759,6 @@ struct test_case { fflush(stdout); if (out->type != GGML_TYPE_F32) { - ggml_free(ctx); printf("not supported [%s->type != FP32]\n", out->name); return true; } @@ -764,7 +766,7 @@ struct test_case { // check if the backend supports the ops bool supported = true; bool any_params = false; - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { if (!ggml_backend_supports_op(backend, t)) { printf("not supported [%s] ", ggml_backend_name(backend)); supported = false; @@ -785,40 +787,38 @@ struct test_case { } if (!supported) { printf("\n"); - ggml_free(ctx); return true; } int64_t ngrads = 0; - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { if (t->flags & GGML_TENSOR_FLAG_PARAM) { ngrads += ggml_nelements(t); } } if (ngrads > grad_nmax()) { printf("skipping large tensors for speed \n"); - ggml_free(ctx); return true; } if (!ggml_is_scalar(out)) { - out = ggml_sum(ctx, out); + out = ggml_sum(ctx.get(), out); ggml_set_name(out, "sum_of_out"); } ggml_set_loss(out); ggml_build_forward_expand(gf, out); ggml_graph_cpy(gf, gb); - ggml_build_backward_expand(ctx, ctx, gb, false); + ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false); if (expect.size() != 1 || expect[0] != 0.0f) { GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf)); - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE); } } - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { if (!ggml_backend_supports_op(backend, t)) { printf("not supported [%s] ", ggml_backend_name(backend)); supported = false; @@ -832,27 +832,32 @@ struct test_case { } if (!supported) { printf("\n"); - ggml_free(ctx); return true; } // allocate - ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend); + ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr if (buf == NULL) { printf("failed to allocate tensors [%s] ", ggml_backend_name(backend)); - ggml_free(ctx); return false; } - - initialize_tensors(ctx); // Randomizes all tensors (including gradients). + initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients). ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise. - ggml_backend_graph_compute(backend, gf); - ggml_backend_graph_compute(backend, gb); + ggml_status status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } + status = ggml_backend_graph_compute(backend, gb); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } bool ok = true; - for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) { + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) { if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) { continue; } @@ -897,20 +902,36 @@ struct test_case { float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float)); - ggml_backend_graph_compute(backend, gf); + status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out)); ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float)); - ggml_backend_graph_compute(backend, gf); + status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out)); if (grad_precise()) { ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float)); - ggml_backend_graph_compute(backend, gf); + status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out)); ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float)); - ggml_backend_graph_compute(backend, gf); + status = ggml_backend_graph_compute(backend, gf); + if (status != GGML_STATUS_SUCCESS) { + fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status)); + return false; + } ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out)); gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps); @@ -936,10 +957,6 @@ struct test_case { printf("compare failed "); } - ggml_backend_buffer_free(buf); - - ggml_free(ctx); - if (ok) { printf("\033[1;32mOK\033[0m\n"); return true;