From f652ebfd544ef9705f3ec30c007ea6d6a5912613 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 22 Jan 2024 18:39:04 +0100 Subject: [PATCH] Implement max_size for backend buffer types to limit the size of a single allocation --- ggml-alloc.c | 115 +++++++++++++++++++++++++++++++++++++------- ggml-backend-impl.h | 15 ++++++ ggml-backend.c | 75 ++++++++++++++++++++++++++++- ggml-backend.h | 3 ++ ggml-cuda.cu | 3 ++ ggml-metal.m | 1 + ggml-opencl.cpp | 2 + ggml-vulkan.cpp | 18 ++++++- 8 files changed, 212 insertions(+), 20 deletions(-) diff --git a/ggml-alloc.c b/ggml-alloc.c index 89b85d34870d7..faca771934799 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -780,6 +780,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte GGML_ASSERT(ggml_get_no_alloc(ctx) == true); size_t alignment = ggml_backend_buft_get_alignment(buft); + size_t max_size = ggml_backend_buft_get_max_size(buft); size_t nbytes = 0; for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { @@ -796,35 +797,115 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte return NULL; } - ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes); - if (buffer == NULL) { - // failed to allocate buffer + // single buffer allocation + if (nbytes <= max_size) { + ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes); + if (buffer == NULL) { + // failed to allocate buffer #ifndef NDEBUG - fprintf(stderr, "%s: failed to allocate buffer\n", __func__); + fprintf(stderr, "%s: failed to allocate buffer\n", __func__); #endif - return NULL; + return NULL; + } + + ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer); + + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + if (t->data == NULL) { + if (t->view_src == NULL) { + ggml_tallocr_alloc(tallocr, t); + } else { + ggml_backend_view_init(buffer, t); + } + } else { + if (t->view_src != NULL) { + // view of a pre-allocated tensor + ggml_backend_view_init(buffer, t); + } + } + } + + ggml_tallocr_free(tallocr); + + return buffer; } - ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer); + // multi-buffer + size_t n_allocs = (nbytes - 1 + max_size) / max_size; + size_t * nbytes_per_alloc = (size_t *) malloc(n_allocs * sizeof(size_t)); + memset(nbytes_per_alloc, 0, n_allocs * sizeof(size_t)); + // Calculate nbytes per alloc + size_t alloc_idx = 0; for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { - if (t->data == NULL) { - if (t->view_src == NULL) { - ggml_tallocr_alloc(tallocr, t); - } else { - ggml_backend_view_init(buffer, t); + if (t->data == NULL && t->view_src == NULL) { + size_t tensor_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment); + if (nbytes_per_alloc[alloc_idx] + tensor_size > max_size) { + // Move to next allocation + alloc_idx += 1; } - } else { - if (t->view_src != NULL) { - // view of a pre-allocated tensor - ggml_backend_view_init(buffer, t); + nbytes_per_alloc[alloc_idx] += tensor_size; + } + } + + ggml_backend_buffer_t multi_buffer = ggml_backend_multi_buffer_alloc_buffer(n_allocs, buft, nbytes); + ggml_backend_multi_buffer_context_t multi_ctx = (ggml_backend_multi_buffer_context_t) multi_buffer->context; + + size_t bytes_counter = 0; + struct ggml_tensor * current_tensor = ggml_get_first_tensor(ctx); + + for (alloc_idx = 0; alloc_idx < n_allocs; alloc_idx++) { + ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes_per_alloc[alloc_idx]); + if (buffer == NULL) { + // failed to allocate buffer +#ifndef NDEBUG + fprintf(stderr, "%s: failed to allocate buffer\n", __func__); +#endif + + // free previously allocated buffers + for (size_t dealloc_idx = 0; dealloc_idx < alloc_idx; dealloc_idx++) { + ggml_backend_buffer_free(multi_ctx->buffers[dealloc_idx]); + } + + free(nbytes_per_alloc); + + return NULL; + } + + multi_ctx->buffers[alloc_idx] = buffer; + + ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer); + + for (; current_tensor != NULL; current_tensor = ggml_get_next_tensor(ctx, current_tensor)) { + size_t tensor_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, current_tensor), alignment); + + if (bytes_counter + tensor_size > max_size) { + // tensor uses next buffer + bytes_counter = 0; + break; + } + + bytes_counter += tensor_size; + if (current_tensor->data == NULL) { + if (current_tensor->view_src == NULL) { + ggml_tallocr_alloc(tallocr, current_tensor); + } else { + ggml_backend_view_init(buffer, current_tensor); + } + } else { + if (current_tensor->view_src != NULL) { + // view of a pre-allocated tensor + ggml_backend_view_init(buffer, current_tensor); + } } } + + ggml_tallocr_free(tallocr); } - ggml_tallocr_free(tallocr); + free(nbytes_per_alloc); - return buffer; + return multi_buffer; } ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) { diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h index 1397828d9ac71..f7919fe2776a9 100644 --- a/ggml-backend-impl.h +++ b/ggml-backend-impl.h @@ -19,6 +19,7 @@ extern "C" { const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft); ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment + size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend // check if tensor data is in host memory @@ -63,6 +64,20 @@ extern "C" { // do not use directly, use ggml_backend_tensor_copy instead bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); + // multi-buffer + struct ggml_backend_multi_buffer_context { + ggml_backend_buffer_t * buffers; + size_t n_buffers; + }; + + typedef struct ggml_backend_multi_buffer_context * ggml_backend_multi_buffer_context_t; + + GGML_CALL const char* ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer); + GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(size_t n_buffers, ggml_backend_buffer_type_t buft, size_t nbytes); + GGML_CALL void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer); + GGML_CALL void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value); + struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void); + // // Backend // diff --git a/ggml-backend.c b/ggml-backend.c index f5424fb904117..af989dec85ddc 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -27,6 +27,14 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) { return buft->iface.get_alignment(buft); } +size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) { + // get_max_size is optional, defaults to UINT64_MAX + if (buft->iface.get_max_size) { + return buft->iface.get_max_size(buft); + } + return UINT64_MAX; +} + GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) { // get_alloc_size is optional, defaults to ggml_nbytes if (buft->iface.get_alloc_size) { @@ -55,8 +63,6 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init( size_t size) { ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer)); - GGML_ASSERT(iface.get_base != NULL); - (*buffer) = (struct ggml_backend_buffer) { /* .interface = */ iface, /* .buft = */ buft, @@ -106,6 +112,10 @@ size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer) { return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer)); } +size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) { + return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer)); +} + size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor); } @@ -169,6 +179,10 @@ size_t ggml_backend_get_alignment(ggml_backend_t backend) { return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)); } +size_t ggml_backend_get_max_size(ggml_backend_t backend) { + return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend)); +} + void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); @@ -342,6 +356,11 @@ GGML_CALL static void ggml_backend_registry_init(void) { extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void); ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL); #endif + +#ifdef GGML_USE_VULKAN + extern GGML_CALL int ggml_backend_vk_reg_devices(void); + ggml_backend_vk_reg_devices(); +#endif } GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { @@ -545,6 +564,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) { /* .get_name = */ ggml_backend_cpu_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, @@ -600,6 +620,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) { /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, @@ -755,6 +776,56 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v } +// multi-buffer buffer + +GGML_CALL const char * ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + + return ctx->buffers[0]->iface.get_name(ctx->buffers[0]); +} + +GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(size_t n_buffers, ggml_backend_buffer_type_t buft, size_t nbytes) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) malloc(sizeof(struct ggml_backend_multi_buffer_context)); + ctx->n_buffers = n_buffers; + ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t)); + + return ggml_backend_buffer_init(buft, ggml_backend_multi_buffer_context_interface(), ctx, nbytes); +} + +GGML_CALL void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + for (size_t i = 0; i < ctx->n_buffers; i++) { + ggml_backend_buffer_free(ctx->buffers[i]); + } + + free(ctx->buffers); + free(ctx); +} + +GGML_CALL void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + for (size_t i = 0; i < ctx->n_buffers; i++) { + ggml_backend_buffer_clear(ctx->buffers[i], value); + } +} + +struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void) { + static struct ggml_backend_buffer_i multi_backend_buffer_i = { + /* .get_name = */ ggml_backend_multi_buffer_get_name, + /* .free_buffer = */ ggml_backend_multi_buffer_free_buffer, + /* .get_base = */ NULL, + /* .init_tensor = */ NULL, + /* .set_tensor = */ NULL, + /* .get_tensor = */ NULL, + /* .cpy_tensor = */ NULL, + /* .clear = */ ggml_backend_multi_buffer_clear, + /* .reset = */ NULL, + }; + + return multi_backend_buffer_i; +} + + // scheduler #define GGML_MAX_BACKENDS 16 diff --git a/ggml-backend.h b/ggml-backend.h index 12b4b4ab74935..de7b4255fd21a 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -20,6 +20,7 @@ extern "C" { GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft); GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size); GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); + GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft); GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend); GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); @@ -36,6 +37,7 @@ extern "C" { GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer); GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); + GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value); GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer); @@ -54,6 +56,7 @@ extern "C" { GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend); GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); + GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 568c411afd3ee..b25c4a8fb071c 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -10428,6 +10428,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = { /* .get_name = */ ggml_backend_cuda_buffer_type_name, /* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size, /* .supports_backend = */ ggml_backend_cuda_buffer_type_supports_backend, /* .is_host = */ NULL, @@ -10703,6 +10704,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface /* .get_name = */ ggml_backend_cuda_split_buffer_type_name, /* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size, /* .supports_backend = */ ggml_backend_cuda_split_buffer_type_supports_backend, /* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host, @@ -10782,6 +10784,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() { /* .get_name = */ ggml_backend_cuda_host_buffer_type_name, /* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, diff --git a/ggml-metal.m b/ggml-metal.m index a549e6713e9bc..35a077299de94 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -2445,6 +2445,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) { /* .get_name = */ ggml_backend_metal_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend, /* .is_host = */ ggml_backend_metal_buffer_type_is_host, diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 2bb93638f1c7c..db56337ffe81b 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -2055,6 +2055,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = { /* .get_name = */ ggml_backend_opencl_buffer_type_name, /* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ NULL, /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend, /* .is_host = */ NULL, @@ -2111,6 +2112,7 @@ ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type() { /* .get_name = */ ggml_backend_opencl_host_buffer_type_name, /* .alloc_buffer = */ ggml_backend_opencl_host_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 82225564f231b..722bbe37ab715 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -121,6 +121,7 @@ typedef std::vector vk_sequence; struct vk_device { vk::PhysicalDevice physical_device; vk::PhysicalDeviceProperties properties; + uint64_t max_memory_allocation_size; bool fp16; vk::Device device; uint32_t vendor_id; @@ -972,7 +973,14 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; vk_instance = vk::createInstance(instance_create_info); vk_device.physical_device = vk_instance.enumeratePhysicalDevices()[dev_num]; - vk_device.properties = vk_device.physical_device.getProperties(); + vk::PhysicalDeviceProperties2 props2; + vk::PhysicalDeviceMaintenance3Properties props3; + props3.pNext = nullptr; + props2.pNext = &props3; + vk_device.physical_device.getProperties2(&props2); + vk_device.properties = props2.properties; + vk_device.max_memory_allocation_size = props3.maxMemoryAllocationSize; + std::cerr << "ggml_vulkan: Using " << vk_device.properties.deviceName << std::endl; vk_device.vendor_id = vk_device.properties.vendorID; @@ -4243,6 +4251,12 @@ GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_b UNUSED(buft); } +GGML_CALL static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + return vk_device.max_memory_allocation_size; + + UNUSED(buft); +} + GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { return ggml_nbytes(tensor); @@ -4259,6 +4273,7 @@ static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = { /* .get_name = */ ggml_backend_vk_buffer_type_name, /* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size, /* .supports_backend = */ ggml_backend_vk_buffer_type_supports_backend, /* .is_host = */ NULL, @@ -4326,6 +4341,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { /* .get_name = */ ggml_backend_vk_host_buffer_type_name, /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to UINT64_MAX /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,