From b71eac2069a30349435c192d682e865718c86a15 Mon Sep 17 00:00:00 2001 From: Clement Sibille Date: Mon, 5 May 2025 08:32:33 +0200 Subject: Add a vulkan renderer that renders an OBJ --- assets/model.obj | 98 + assets/texture.png | Bin 0 -> 5056 bytes meson.build | 48 +- shaders/triangle.frag | 5 +- shaders/triangle.vert | 26 +- src/alloc.c | 171 + src/alloc.h | 40 + src/engine.c | 17 + src/engine.h | 14 + src/hash.c | 15 + src/hash.h | 9 + src/image.c | 20 + src/image.h | 21 + src/log.c | 13 +- src/log.h | 21 +- src/main.c | 28 +- src/maths.c | 103 + src/maths.h | 49 + src/platform.c | 5 + src/platform.h | 98 +- src/platform_sdl.c | 124 +- src/renderer.c | 1470 ---- src/renderer.h | 45 - src/renderer/renderer.c | 2559 ++++++ src/renderer/renderer.h | 126 + src/renderer/vma_usage.cpp | 4 + src/renderer/vma_usage.h | 6 + src/str.c | 181 + src/str.h | 62 + thirdpartylicenses.md | 46 + vendor/stb_image.h | 7988 ++++++++++++++++++ vendor/tiny_obj_loader_c.h | 1793 ++++ vendor/vk_mem_alloc.h | 19111 +++++++++++++++++++++++++++++++++++++++++++ 33 files changed, 32614 insertions(+), 1702 deletions(-) create mode 100644 assets/model.obj create mode 100644 assets/texture.png create mode 100644 src/alloc.c create mode 100644 src/alloc.h create mode 100644 src/engine.c create mode 100644 src/engine.h create mode 100644 src/hash.c create mode 100644 src/hash.h create mode 100644 src/image.c create mode 100644 src/image.h create mode 100644 src/maths.c create mode 100644 src/maths.h create mode 100644 src/platform.c delete mode 100644 src/renderer.c delete mode 100644 src/renderer.h create mode 100644 src/renderer/renderer.c create mode 100644 src/renderer/renderer.h create mode 100644 src/renderer/vma_usage.cpp create mode 100644 src/renderer/vma_usage.h create mode 100644 src/str.c create mode 100644 src/str.h create mode 100644 thirdpartylicenses.md create mode 100644 vendor/stb_image.h create mode 100644 vendor/tiny_obj_loader_c.h create mode 100644 vendor/vk_mem_alloc.h diff --git a/assets/model.obj b/assets/model.obj new file mode 100644 index 0000000..ab344bb --- /dev/null +++ b/assets/model.obj @@ -0,0 +1,98 @@ +# Blender 4.4.1 +# www.blender.org +mtllib model.mtl +o Cube +v 1.000000 1.000000 -1.000000 +v 1.000000 -1.000000 -1.000000 +v 1.000000 1.000000 1.000000 +v 1.000000 -1.000000 1.000000 +v -1.000000 1.000000 -1.000000 +v -1.000000 -1.000000 -1.000000 +v -1.000000 1.000000 1.000000 +v -1.000000 -1.000000 1.000000 +v -1.637802 -1.000000 0.000000 +v 1.389749 1.000000 0.000000 +v -1.637802 1.000000 0.000000 +v 1.389749 -1.000000 0.000000 +v 0.000000 -1.000000 -1.000000 +v 0.000000 1.000000 1.000000 +v 0.000000 -1.000000 1.000000 +v 0.000000 1.000000 -1.000000 +v 0.000000 -1.000000 0.000000 +v 0.000000 1.905488 0.000000 +vn -0.3792 0.6859 0.6211 +vn -0.0000 -0.0000 1.0000 +vn -0.8431 -0.0000 -0.5377 +vn -0.0000 -1.0000 -0.0000 +vn 0.9317 -0.0000 0.3631 +vn -0.0000 -0.0000 -1.0000 +vn 0.9317 -0.0000 -0.3631 +vn -0.8431 -0.0000 0.5377 +vn -0.3792 0.6859 -0.6211 +vn -0.0000 1.0000 -0.0000 +vn 0.4349 0.6675 -0.6044 +vn 0.4349 0.6675 0.6044 +vt 0.875000 0.625000 +vt 0.750000 0.750000 +vt 0.750000 0.625000 +vt 0.625000 0.875000 +vt 0.375000 1.000000 +vt 0.375000 0.875000 +vt 0.625000 0.125000 +vt 0.375000 0.250000 +vt 0.375000 0.125000 +vt 0.375000 0.625000 +vt 0.250000 0.750000 +vt 0.250000 0.625000 +vt 0.625000 0.625000 +vt 0.375000 0.750000 +vt 0.625000 0.375000 +vt 0.375000 0.500000 +vt 0.375000 0.375000 +vt 0.625000 0.500000 +vt 0.250000 0.500000 +vt 0.625000 0.000000 +vt 0.375000 0.000000 +vt 0.750000 0.500000 +vt 0.125000 0.625000 +vt 0.125000 0.500000 +vt 0.625000 0.250000 +vt 0.125000 0.750000 +vt 0.625000 0.750000 +vt 0.875000 0.750000 +vt 0.625000 1.000000 +vt 0.875000 0.500000 +s 0 +usemtl Material +f 11/1/1 14/2/1 18/3/1 +f 14/4/2 8/5/2 15/6/2 +f 11/7/3 6/8/3 9/9/3 +f 12/10/4 15/11/4 17/12/4 +f 10/13/5 4/14/5 12/10/5 +f 16/15/6 2/16/6 13/17/6 +f 1/18/7 12/10/7 2/16/7 +f 2/16/4 17/12/4 13/19/4 +f 7/20/8 9/9/8 8/21/8 +f 16/22/9 11/1/9 18/3/9 +f 16/22/10 10/13/10 1/18/10 +f 13/19/4 9/23/4 6/24/4 +f 5/25/6 13/17/6 6/8/6 +f 17/12/4 8/26/4 9/23/4 +f 3/27/2 15/6/2 4/14/2 +f 10/13/10 14/2/10 3/27/10 +f 11/1/10 7/28/10 14/2/10 +f 14/4/2 7/29/2 8/5/2 +f 11/7/3 5/25/3 6/8/3 +f 12/10/4 4/14/4 15/11/4 +f 10/13/5 3/27/5 4/14/5 +f 16/15/6 1/18/6 2/16/6 +f 1/18/7 10/13/7 12/10/7 +f 2/16/4 12/10/4 17/12/4 +f 7/20/8 11/7/8 9/9/8 +f 16/22/10 5/30/10 11/1/10 +f 16/22/11 18/3/11 10/13/11 +f 13/19/4 17/12/4 9/23/4 +f 5/25/6 16/15/6 13/17/6 +f 17/12/4 15/11/4 8/26/4 +f 3/27/2 14/4/2 15/6/2 +f 10/13/12 18/3/12 14/2/12 diff --git a/assets/texture.png b/assets/texture.png new file mode 100644 index 0000000..4065f75 Binary files /dev/null and b/assets/texture.png differ diff --git a/meson.build b/meson.build index 6aa0ceb..8ea3009 100644 --- a/meson.build +++ b/meson.build @@ -1,8 +1,11 @@ -project('visible-gltf', 'c', default_options: ['warning_level=3', 'c_std=c23']) +project('visiblegltf', [ 'c', 'cpp' ], default_options: ['c_std=c2x', 'cpp_std=c++20', 'warning_level=3']) build_type = get_option('buildtype') sdl3_dep = dependency('sdl3') + +vendor_incdir = include_directories('vendor', is_system: true) + if host_machine.system() == 'darwin' moltenvk_library_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/lib' moltenvk_include_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/include' @@ -10,7 +13,7 @@ vulkan_dep = declare_dependency( link_args: ['-L' + moltenvk_library_path, '-lvulkan'], include_directories: include_directories(moltenvk_include_path) ) -else +else vulkan_dep = dependency('vulkan') endif @@ -19,14 +22,39 @@ if build_type == 'debug' vgltf_c_args += '-DVGLTF_DEBUG' endif -executable( +if host_machine.system() == 'darwin' + vgltf_c_args += '-DVGLTF_PLATFORM_MACOS' +elif host_machine.system() == 'linux' + vgltf_c_args += '-DVGLTF_PLATFORM_LINUX' +elif host_machine.system() == 'windows' + vgltf_c_args += '-DVGLTF_PLATFORM_WINDOWS' +endif + +vgltf_deps = [ + sdl3_dep, + vulkan_dep, +] + +vgltf_srcs = [ + 'src/main.c', + 'src/log.c', + 'src/maths.c', + 'src/alloc.c', + 'src/hash.c', + 'src/str.c', + 'src/platform.c', + 'src/platform_sdl.c', + 'src/image.c', + 'src/renderer/renderer.c', + 'src/renderer/vma_usage.cpp', + 'src/engine.c', +] + +vgltf_exe = executable( 'vgltf', - [ - 'src/main.c', - 'src/log.c', - 'src/platform_sdl.c', - 'src/renderer.c', - ], + vgltf_srcs, c_args: vgltf_c_args, - dependencies: [sdl3_dep, vulkan_dep], + dependencies: vgltf_deps, + link_language: 'cpp', + include_directories: [vendor_incdir] ) diff --git a/shaders/triangle.frag b/shaders/triangle.frag index 7c5b0e7..c7d99f3 100644 --- a/shaders/triangle.frag +++ b/shaders/triangle.frag @@ -1,9 +1,12 @@ #version 450 layout(location = 0) in vec3 fragColor; +layout(location = 1) in vec2 fragTextureCoordinates; layout(location = 0) out vec4 outColor; +layout(binding = 1) uniform sampler2D textureSampler; + void main() { - outColor = vec4(fragColor, 1.0); + outColor = vec4(fragColor * texture(textureSampler, fragTextureCoordinates).rgb, 1.0); } diff --git a/shaders/triangle.vert b/shaders/triangle.vert index f5b2f8d..bf93f44 100644 --- a/shaders/triangle.vert +++ b/shaders/triangle.vert @@ -1,20 +1,20 @@ #version 450 -layout(location = 0) out vec3 fragColor; +layout(location = 0) in vec3 inPosition; +layout(location = 1) in vec3 inColor; +layout(location = 2) in vec2 inTextureCoordinates; -vec2 positions[3] = vec2[]( - vec2(0.0, -0.5), - vec2(0.5, 0.5), - vec2(-0.5, 0.5) -); +layout(location = 0) out vec3 fragColor; +layout(location = 1) out vec2 fragTextureCoordinates; -vec3 colors[3] = vec3[]( - vec3(1.0, 0.0, 0.0), - vec3(0.0, 1.0, 0.0), - vec3(0.0, 0.0, 1.0) -); +layout(set = 0, binding = 0) uniform UniformBufferObject { + mat4 model; + mat4 view; + mat4 projection; +} ubo; void main() { - gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0); - fragColor = colors[gl_VertexIndex]; + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPosition, 1.0); + fragColor = inColor; + fragTextureCoordinates = inTextureCoordinates; } diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..2fb7a78 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,171 @@ +#include "alloc.h" +#include "maths.h" +#include "platform.h" +#include +#include +#include +#include + +void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size) { + assert(allocator); + return allocator->allocate(size, allocator->ctx); +} +void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator, + size_t alignment, size_t size) { + assert(allocator); + return allocator->allocate_aligned(alignment, size, allocator->ctx); +} +void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator, + size_t count, size_t item_size) { + assert(allocator); + return allocator->allocate_array(count, item_size, allocator->ctx); +} +void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr, + size_t old_size, size_t new_size) { + assert(allocator); + return allocator->reallocate(ptr, old_size, new_size, allocator->ctx); +} +void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr) { + assert(allocator); + allocator->free(ptr, allocator->ctx); +} + +static void *memory_allocate(size_t size, void *ctx) { + (void)ctx; + void *ptr = malloc(size); + if (!ptr) { + VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); + } + return ptr; +} + +static void *memory_allocate_aligned(size_t alignment, size_t size, void *ctx) { + (void)ctx; +#ifdef VGLTF_PLATFORM_WINDOWS + void *ptr = _aligned_malloc(size, VGLTF_MAX(alignment, sizeof(void *))); +#else + void *ptr = aligned_alloc(VGLTF_MAX(alignment, sizeof(void *)), size); +#endif + if (!ptr) { + VGLTF_PANIC("Couldn't allocate aligned memory (out of mem?)"); + } + return ptr; +} + +static void *memory_allocate_array(size_t count, size_t item_size, void *ctx) { + (void)ctx; + void *ptr = calloc(count, item_size); + if (!ptr) { + VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); + } + return ptr; +} + +static void *memory_reallocate(void *ptr, size_t old_size, size_t new_size, + void *ctx) { + (void)old_size; + (void)ctx; + ptr = realloc(ptr, new_size); + if (!ptr) { + VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); + } + return ptr; +} + +static void memory_free(void *ptr, void *ctx) { + (void)ctx; + free(ptr); +} + +thread_local struct vgltf_allocator system_allocator = { + .allocate = memory_allocate, + .allocate_aligned = memory_allocate_aligned, + .allocate_array = memory_allocate_array, + .reallocate = memory_reallocate, + .free = memory_free}; + +void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena, + size_t size) { + assert(allocator); + assert(arena); + arena->size = 0; + arena->capacity = size; + arena->data = vgltf_allocator_allocate(allocator, size); +} +void vgltf_arena_deinit(struct vgltf_allocator *allocator, + struct vgltf_arena *arena) { + assert(allocator); + assert(arena); + vgltf_allocator_free(allocator, arena->data); +} +void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size) { + assert(arena); + assert(arena->size + size <= arena->capacity); + void *ptr = arena->data + arena->size; + arena->size += size; + return ptr; +} + +void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count, + size_t item_size) { + assert(arena); + void *ptr = vgltf_arena_allocate(arena, count * item_size); + memset(ptr, 0, count * item_size); + return ptr; +} + +void vgltf_arena_reset(struct vgltf_arena *arena) { + assert(arena); + arena->size = 0; +} + +static void *arena_allocator_allocate(size_t size, void *ctx) { + assert(ctx); + return vgltf_arena_allocate(ctx, size); +} +static void *arena_allocator_allocate_aligned(size_t alignment, size_t size, + void *ctx) { + assert(ctx); + if (alignment < sizeof(void *) || (alignment & (alignment - 1)) != 0) { + return NULL; + } + + void *ptr = vgltf_arena_allocate(ctx, size + alignment - 1 + sizeof(void *)); + if (!ptr) { + return NULL; + } + + return (void *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & + ~(alignment - 1)); +} + +static void *arena_allocator_allocate_array(size_t count, size_t item_size, + void *ctx) { + assert(ctx); + return vgltf_arena_allocate_array(ctx, count, item_size); +} + +static void *arena_allocator_reallocate(void *ptr, size_t old_size, + size_t new_size, void *ctx) { + assert(ptr); + assert(ctx); + + void *new_ptr = vgltf_arena_allocate(ctx, new_size); + memcpy(new_ptr, ptr, old_size); + return new_ptr; +} + +static void arena_allocator_free(void *ptr, void *ctx) { + assert(ctx); + (void)ptr; +} + +struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena) { + return (struct vgltf_allocator){ + .ctx = arena, + .allocate = arena_allocator_allocate, + .allocate_aligned = arena_allocator_allocate_aligned, + .allocate_array = arena_allocator_allocate_array, + .reallocate = arena_allocator_reallocate, + .free = arena_allocator_free}; +} diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..bde1d55 --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,40 @@ +#ifndef VGLTF_ALLOC_H +#define VGLTF_ALLOC_H + +#include + +struct vgltf_allocator { + void *(*allocate)(size_t size, void *ctx); + void *(*allocate_aligned)(size_t alignment, size_t size, void *ctx); + void *(*allocate_array)(size_t count, size_t item_size, void *ctx); + void *(*reallocate)(void *ptr, size_t old_size, size_t new_size, void *ctx); + void (*free)(void *ptr, void *ctx); + void *ctx; +}; + +void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size); +void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator, + size_t alignment, size_t size); +void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator, + size_t count, size_t item_size); +void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr, + size_t old_size, size_t new_size); +void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr); + +extern thread_local struct vgltf_allocator system_allocator; + +struct vgltf_arena { + size_t capacity; + size_t size; + char *data; +}; +void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena, + size_t size); +void vgltf_arena_deinit(struct vgltf_allocator *allocator, struct vgltf_arena *arena); +void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size); +void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count, + size_t item_size); +void vgltf_arena_reset(struct vgltf_arena *arena); +struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena); + +#endif // VGLTF_ALLOC_H diff --git a/src/engine.c b/src/engine.c new file mode 100644 index 0000000..8904474 --- /dev/null +++ b/src/engine.c @@ -0,0 +1,17 @@ +#include "engine.h" + +bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform) { + if (!vgltf_renderer_init(&engine->renderer, platform)) { + goto err; + } + + return true; +err: + return false; +} +void vgltf_engine_deinit(struct vgltf_engine *engine) { + vgltf_renderer_deinit(&engine->renderer); +} +void vgltf_engine_run_frame(struct vgltf_engine *engine) { + vgltf_renderer_render_frame(&engine->renderer); +} diff --git a/src/engine.h b/src/engine.h new file mode 100644 index 0000000..5a7bc2d --- /dev/null +++ b/src/engine.h @@ -0,0 +1,14 @@ +#ifndef VGLTF_ENGINE_H +#define VGLTF_ENGINE_H + +#include "renderer/renderer.h" + +struct vgltf_engine { + struct vgltf_renderer renderer; +}; + +bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform); +void vgltf_engine_deinit(struct vgltf_engine *engine); +void vgltf_engine_run_frame(struct vgltf_engine *engine); + +#endif // VGLTF_ENGINE_H diff --git a/src/hash.c b/src/hash.c new file mode 100644 index 0000000..cfdafc3 --- /dev/null +++ b/src/hash.c @@ -0,0 +1,15 @@ +#include "hash.h" +#include + +uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes) { + assert(bytes); + static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u; + static const uint64_t FNV_PRIME = 1099511628211u; + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < nbytes; i++) { + hash = hash ^ bytes[i]; + hash = hash * FNV_PRIME; + } + + return hash; +} diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 0000000..f4f8e76 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,9 @@ +#ifndef VGLTF_HASH_H +#define VGLTF_HASH_H + +#include +#include + +uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes); + +#endif // VGLTF_HASH_H diff --git a/src/image.c b/src/image.c new file mode 100644 index 0000000..a2d29c7 --- /dev/null +++ b/src/image.c @@ -0,0 +1,20 @@ +#include "image.h" + +#define STB_IMAGE_IMPLEMENTATION +#include + +bool vgltf_image_load_from_file(struct vgltf_image *image, + struct vgltf_string_view path) { + int width; + int height; + int tex_channels; + image->data = + stbi_load(path.data, &width, &height, &tex_channels, STBI_rgb_alpha); + image->width = width; + image->height = height; + image->format = VGLTF_IMAGE_FORMAT_R8G8B8A8; + + return image->data != nullptr; +} + +void vgltf_image_deinit(struct vgltf_image *image) { stbi_image_free(image->data); } diff --git a/src/image.h b/src/image.h new file mode 100644 index 0000000..426d605 --- /dev/null +++ b/src/image.h @@ -0,0 +1,21 @@ +#ifndef VGLTF_IMAGE_H +#define VGLTF_IMAGE_H + +#include +#include "str.h" + +enum vgltf_image_format { + VGLTF_IMAGE_FORMAT_R8G8B8A8, +}; + +struct vgltf_image { + unsigned char* data; + uint32_t width; + uint32_t height; + enum vgltf_image_format format; +}; + +bool vgltf_image_load_from_file(struct vgltf_image* image, struct vgltf_string_view path); +void vgltf_image_deinit(struct vgltf_image* image); + +#endif // VGLTF_IMAGE_H diff --git a/src/log.c b/src/log.c index 0c1b8a4..99c10dc 100644 --- a/src/log.c +++ b/src/log.c @@ -1,12 +1,5 @@ #include "log.h" -const char *vgltf_log_level_to_str(enum vgltf_log_level level) { - switch (level) { - case VGLTF_LOG_ERROR: - return "error"; - case VGLTF_LOG_INFO: - return "info"; - case VGLTF_LOG_DEBUG: - return "debug"; - } -} +const char *vgltf_log_level_str[] = {[VGLTF_LOG_LEVEL_DBG] = "debug", + [VGLTF_LOG_LEVEL_INFO] = "info", + [VGLTF_LOG_LEVEL_ERR] = "error"}; diff --git a/src/log.h b/src/log.h index 5e9dcc0..faf9edd 100644 --- a/src/log.h +++ b/src/log.h @@ -1,25 +1,26 @@ #ifndef VGLTF_LOG_H #define VGLTF_LOG_H -#include +#include // IWYU pragma: keep enum vgltf_log_level { - VGLTF_LOG_DEBUG, - VGLTF_LOG_INFO, - VGLTF_LOG_ERROR, + VGLTF_LOG_LEVEL_DBG, + VGLTF_LOG_LEVEL_INFO, + VGLTF_LOG_LEVEL_ERR, }; -const char *vgltf_log_level_to_str(enum vgltf_log_level level); -#define VGLTF_LOG(level, ...) \ +extern const char *vgltf_log_level_str[]; + +#define VGLTF_LOG(level, ...) \ do { \ - fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_to_str(level), __FILE__, \ + fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_str[level], __FILE__, \ __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ } while (0) -#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_DEBUG, __VA_ARGS__) -#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_INFO, __VA_ARGS__) -#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_ERROR, __VA_ARGS__) +#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_LEVEL_DBG, __VA_ARGS__) +#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_LEVEL_INFO, __VA_ARGS__) +#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_LEVEL_ERR, __VA_ARGS__) #endif // VGLTF_LOG_H diff --git a/src/main.c b/src/main.c index d2ea49d..d518632 100644 --- a/src/main.c +++ b/src/main.c @@ -1,43 +1,39 @@ +#include "engine.h" #include "log.h" #include "platform.h" -#include "renderer.h" int main(void) { struct vgltf_platform platform = {}; if (!vgltf_platform_init(&platform)) { - VGLTF_LOG_ERR("Couldn't initialize the platform layer"); + VGLTF_LOG_ERR("Platform initialization failed"); goto err; } - struct vgltf_renderer renderer = {}; - if (!vgltf_renderer_init(&renderer, &platform)) { - VGLTF_LOG_ERR("Couldn't initialize the renderer"); + struct vgltf_engine engine = {}; + if (!vgltf_engine_init(&engine, &platform)) { + VGLTF_LOG_ERR("Couldn't initialize the engine"); goto deinit_platform; } + VGLTF_LOG_INFO("Starting main loop"); while (true) { struct vgltf_event event; while (vgltf_platform_poll_event(&platform, &event)) { - if (event.type == VGLTF_EVENT_QUIT || - (event.type == VGLTF_EVENT_KEY_DOWN && - event.key.key == VGLTF_KEY_ESCAPE)) { + if (event.type == VGLTF_EVENT_QUIT || (event.type == VGLTF_EVENT_KEY_DOWN && + event.key.key == VGLTF_KEY_ESCAPE)) { goto out_main_loop; - } else if (event.type == VGLTF_EVENT_WINDOW_RESIZED) { - vgltf_renderer_on_window_resized( - &renderer, - (struct vgltf_window_size){.width = event.window_resized.width, - .height = event.window_resized.height}); } } - vgltf_renderer_triangle_pass(&renderer); + vgltf_engine_run_frame(&engine); } out_main_loop: - vgltf_renderer_deinit(&renderer); + VGLTF_LOG_INFO("Exiting main loop"); + vgltf_engine_deinit(&engine); vgltf_platform_deinit(&platform); return 0; deinit_platform: vgltf_platform_deinit(&platform); err: - return 1; + return -1; } diff --git a/src/maths.c b/src/maths.c new file mode 100644 index 0000000..a79c68f --- /dev/null +++ b/src/maths.c @@ -0,0 +1,103 @@ +#include "maths.h" +#include +#include + +vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs) { + return (vgltf_vec3){.x = lhs.x - rhs.x, .y = lhs.y - rhs.y, .z = lhs.z - rhs.z}; +} +vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs) { + return (vgltf_vec3){.x = lhs.y * rhs.z - lhs.z * rhs.y, + .y = lhs.z * rhs.x - lhs.x * rhs.z, + .z = lhs.x * rhs.y - lhs.y * rhs.x}; +} +vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs) { + return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z; +} +vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec) { + return sqrtf(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z); +} +vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec) { + vgltf_vec_value_type length = vgltf_vec3_length(vec); + return (vgltf_vec3){ + .x = vec.x / length, .y = vec.y / length, .z = vec.z / length}; +} +void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs) { + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + out[i * 4 + j] = + lhs[i * 4 + 0] * rhs[0 * 4 + j] + lhs[i * 4 + 1] * rhs[1 * 4 + j] + + lhs[i * 4 + 2] * rhs[2 * 4 + j] + lhs[i * 4 + 3] * rhs[3 * 4 + j]; + } + } +} +void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix, + vgltf_mat_value_type angle_radians, vgltf_vec3 axis) { + vgltf_vec3 a = vgltf_vec3_normalized(axis); + vgltf_vec_value_type c = cosf(angle_radians); + vgltf_vec_value_type s = sinf(angle_radians); + vgltf_vec_value_type t = 1.f - c; + + vgltf_mat4 rotation_matrix = {t * a.x * a.x + c, + t * a.x * a.y - s * a.z, + t * a.x * a.z + s * a.y, + 0.f, + t * a.x * a.y + s * a.z, + t * a.y * a.y + c, + t * a.y * a.z - s * a.x, + 0.f, + t * a.x * a.z - s * a.y, + t * a.y * a.z + s * a.x, + t * a.z * a.z + c, + 0.f, + 0.f, + 0.f, + 0.f, + 1.f}; + + vgltf_mat4_multiply(out, matrix, rotation_matrix); +} +void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position, + vgltf_vec3 target_position, vgltf_vec3 up_axis) { + vgltf_vec3 forward = + vgltf_vec3_normalized(vgltf_vec3_sub(target_position, eye_position)); + vgltf_vec3 right = vgltf_vec3_normalized(vgltf_vec3_cross(forward, up_axis)); + vgltf_vec3 camera_up = vgltf_vec3_cross(right, forward); + + memcpy(out, (const vgltf_mat4)VGLTF_MAT4_IDENTITY, sizeof(vgltf_mat4)); + out[0 * 4 + 0] = right.x; + out[1 * 4 + 0] = right.y; + out[2 * 4 + 0] = right.z; + out[0 * 4 + 1] = camera_up.x; + out[1 * 4 + 1] = camera_up.y; + out[2 * 4 + 1] = camera_up.z; + out[0 * 4 + 2] = -forward.x; + out[1 * 4 + 2] = -forward.y; + out[2 * 4 + 2] = -forward.z; + out[3 * 4 + 0] = -vgltf_vec3_dot(right, eye_position); + out[3 * 4 + 1] = -vgltf_vec3_dot(camera_up, eye_position); + out[3 * 4 + 2] = vgltf_vec3_dot(forward, eye_position); +} +void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov_radians, + vgltf_mat_value_type aspect_ratio, + vgltf_mat_value_type near, vgltf_mat_value_type far) { + float tan_half_fovy = tanf(fov_radians / 2.0f); + out[0] = 1.f / (aspect_ratio * tan_half_fovy); + out[1] = 0.0f; + out[2] = 0.0f; + out[3] = 0.0f; + + out[4] = 0.0f; + out[5] = 1.f / tan_half_fovy; + out[6] = 0.0f; + out[7] = 0.0f; + + out[8] = 0.0f; + out[9] = 0.0f; + out[10] = -(far + near) / (far - near); + out[11] = -1.0f; + + out[12] = 0.0f; + out[13] = 0.0f; + out[14] = -(2.0f * far * near) / (far - near); + out[15] = 0.0f; +} diff --git a/src/maths.h b/src/maths.h new file mode 100644 index 0000000..d50f285 --- /dev/null +++ b/src/maths.h @@ -0,0 +1,49 @@ +#ifndef VGLTF_MATHS_H +#define VGLTF_MATHS_H + +typedef float vgltf_vec_value_type; + +constexpr double VGLTF_MATHS_PI = 3.14159265358979323846; +#define VGLTF_MATHS_DEG_TO_RAD(deg) (deg * VGLTF_MATHS_PI / 180.0) +#define VGLTF_MAX(x, y) ((x) > (y) ? (x) : (y)) + +typedef struct { + vgltf_vec_value_type x; + vgltf_vec_value_type y; +} vgltf_vec2; + +typedef struct { + vgltf_vec_value_type x; + vgltf_vec_value_type y; + vgltf_vec_value_type z; +} vgltf_vec3; +vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs); +vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs); +vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs); + +vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec); +vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec); + +typedef vgltf_vec_value_type vgltf_mat_value_type; + +// row major +typedef vgltf_mat_value_type vgltf_mat4[16]; +void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs); +void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix, + vgltf_mat_value_type angle_radians, vgltf_vec3 axis); +void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position, + vgltf_vec3 target_position, vgltf_vec3 up_axis); +void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov, + vgltf_mat_value_type aspect_ratio, + vgltf_mat_value_type near, vgltf_mat_value_type far); + +// clang-format off +#define VGLTF_MAT4_IDENTITY { \ + 1, 0, 0, 0, \ + 0, 1, 0, 0, \ + 0, 0, 1, 0, \ + 0, 0, 0, 1, \ +} +// clang-format on + +#endif // VGLTF_MATHS_H diff --git a/src/platform.c b/src/platform.c new file mode 100644 index 0000000..da4d7d4 --- /dev/null +++ b/src/platform.c @@ -0,0 +1,5 @@ +#include "platform.h" + +#define VGLTF_GENERATE_KEY_STRING(KEY) #KEY, +const char *vgltf_key_str[] = {VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_STRING)}; +#undef VGLTF_GENERATE_KEY_STRING diff --git a/src/platform.h b/src/platform.h index fe719d3..aff673f 100644 --- a/src/platform.h +++ b/src/platform.h @@ -3,66 +3,61 @@ #include "log.h" #include -#include +#include // IWYU pragma: keep -#define VGLTF_PANIC(...) \ +#define VGLTF_PANIC(...) \ do { \ - VGLTF_LOG_ERR("panic: " __VA_ARGS__); \ + VGLTF_LOG_ERR("PANIC " __VA_ARGS__); \ exit(1); \ } while (0) -enum vgltf_event_type { - VGLTF_EVENT_QUIT, - VGLTF_EVENT_KEY_DOWN, - VGLTF_EVENT_WINDOW_RESIZED, - VGLTF_EVENT_UNKNOWN, -}; +#define VGLTF_FOREACH_KEY(_M) \ + _M(A) \ + _M(B) \ + _M(C) \ + _M(D) \ + _M(E) \ + _M(F) \ + _M(G) \ + _M(H) \ + _M(I) \ + _M(J) \ + _M(K) \ + _M(L) \ + _M(M) \ + _M(N) \ + _M(O) \ + _M(P) \ + _M(Q) \ + _M(R) \ + _M(S) \ + _M(T) \ + _M(U) \ + _M(V) \ + _M(W) \ + _M(X) \ + _M(Y) \ + _M(Z) \ + _M(ESCAPE) +#define VGLTF_GENERATE_KEY_ENUM(KEY) VGLTF_KEY_##KEY, enum vgltf_key { - VGLTF_KEY_A, - VGLTF_KEY_B, - VGLTF_KEY_C, - VGLTF_KEY_D, - VGLTF_KEY_E, - VGLTF_KEY_F, - VGLTF_KEY_G, - VGLTF_KEY_H, - VGLTF_KEY_I, - VGLTF_KEY_J, - VGLTF_KEY_K, - VGLTF_KEY_L, - VGLTF_KEY_M, - VGLTF_KEY_N, - VGLTF_KEY_O, - VGLTF_KEY_P, - VGLTF_KEY_Q, - VGLTF_KEY_R, - VGLTF_KEY_S, - VGLTF_KEY_T, - VGLTF_KEY_U, - VGLTF_KEY_V, - VGLTF_KEY_W, - VGLTF_KEY_X, - VGLTF_KEY_Y, - VGLTF_KEY_Z, - VGLTF_KEY_ESCAPE, + VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_ENUM) VGLTF_KEY_COUNT, VGLTF_KEY_UNKNOWN }; +#undef VGLTF_GENERATE_KEY_ENUM +extern const char *vgltf_key_str[]; + +enum vgltf_event_type { VGLTF_EVENT_QUIT, VGLTF_EVENT_KEY_DOWN, VGLTF_EVENT_UNKNOWN }; struct vgltf_key_event { enum vgltf_key key; }; -struct vgltf_window_resized_event { - int32_t width; - int32_t height; -}; - struct vgltf_event { enum vgltf_event_type type; union { struct vgltf_key_event key; - struct vgltf_window_resized_event window_resized; }; }; @@ -75,18 +70,19 @@ struct vgltf_platform; bool vgltf_platform_init(struct vgltf_platform *platform); void vgltf_platform_deinit(struct vgltf_platform *platform); bool vgltf_platform_poll_event(struct vgltf_platform *platform, - struct vgltf_event *event); + struct vgltf_event *event); bool vgltf_platform_get_window_size(struct vgltf_platform *platform, - struct vgltf_window_size *window_size); - -// Vulkan specifics -#include "vulkan/vulkan_core.h" -char const *const * + struct vgltf_window_size *window_size); +bool vgltf_platform_get_current_time_nanoseconds(long *time); +char *vgltf_platform_read_file_to_string(const char *filepath, size_t *out_size); +const char *const * vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform, - uint32_t *count); + uint32_t *count); + +#include bool vgltf_platform_create_vulkan_surface(struct vgltf_platform *platform, - VkInstance instance, - VkSurfaceKHR *surface); + VkInstance instance, + VkSurfaceKHR *surface); #include "platform_sdl.h" diff --git a/src/platform_sdl.c b/src/platform_sdl.c index 5cc6032..6593b9e 100644 --- a/src/platform_sdl.c +++ b/src/platform_sdl.c @@ -1,29 +1,25 @@ +#include "platform_sdl.h" #include "log.h" #include "platform.h" -#include "platform_sdl.h" -#include bool vgltf_platform_init(struct vgltf_platform *platform) { + VGLTF_LOG_INFO("Initializing SDL platform..."); + if (!SDL_Init(SDL_INIT_VIDEO)) { VGLTF_LOG_ERR("SDL initialization failed: %s", SDL_GetError()); goto err; } - constexpr char WINDOW_TITLE[] = "VisibleGLTF"; - constexpr int WINDOW_WIDTH = 800; - constexpr int WINDOW_HEIGHT = 600; - SDL_Window *window = - SDL_CreateWindow(WINDOW_TITLE, WINDOW_WIDTH, WINDOW_HEIGHT, - SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE); - if (!window) { - VGLTF_LOG_ERR("SDL window creation failed: %s", SDL_GetError()); - goto quit_sdl; + platform->window = SDL_CreateWindow("vgltf", 800, 600, SDL_WINDOW_VULKAN); + if (!platform->window) { + VGLTF_LOG_ERR("Window creation failed: %s", SDL_GetError()); + goto deinit_sdl; } - platform->window = window; - + VGLTF_LOG_INFO("SDL platform initialized"); return true; -quit_sdl: + +deinit_sdl: SDL_Quit(); err: return false; @@ -31,67 +27,23 @@ err: void vgltf_platform_deinit(struct vgltf_platform *platform) { SDL_DestroyWindow(platform->window); SDL_Quit(); + VGLTF_LOG_INFO("SDL platform deinitialized"); } -static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode keycode) { - switch (keycode) { - case SDLK_A: - return VGLTF_KEY_A; - case SDLK_B: - return VGLTF_KEY_B; - case SDLK_C: - return VGLTF_KEY_C; - case SDLK_D: - return VGLTF_KEY_D; - case SDLK_E: - return VGLTF_KEY_E; - case SDLK_F: - return VGLTF_KEY_F; - case SDLK_G: - return VGLTF_KEY_G; - case SDLK_H: - return VGLTF_KEY_H; - case SDLK_I: - return VGLTF_KEY_I; - case SDLK_J: - return VGLTF_KEY_J; - case SDLK_K: - return VGLTF_KEY_K; - case SDLK_L: - return VGLTF_KEY_L; - case SDLK_M: - return VGLTF_KEY_M; - case SDLK_N: - return VGLTF_KEY_N; - case SDLK_O: - return VGLTF_KEY_O; - case SDLK_P: - return VGLTF_KEY_P; - case SDLK_Q: - return VGLTF_KEY_Q; - case SDLK_R: - return VGLTF_KEY_R; - case SDLK_S: - return VGLTF_KEY_S; - case SDLK_T: - return VGLTF_KEY_T; - case SDLK_U: - return VGLTF_KEY_U; - case SDLK_V: - return VGLTF_KEY_V; - case SDLK_W: - return VGLTF_KEY_W; - case SDLK_X: - return VGLTF_KEY_X; - case SDLK_Y: - return VGLTF_KEY_Y; - case SDLK_Z: - return VGLTF_KEY_Z; - case SDLK_ESCAPE: - return VGLTF_KEY_ESCAPE; + +#define VGLTF_GENERATE_SDL_KEYCODE_MAPPING(KEY) \ + case SDLK_##KEY: \ + return VGLTF_KEY_##KEY; + +static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode key_code) { + switch (key_code) { + VGLTF_FOREACH_KEY(VGLTF_GENERATE_SDL_KEYCODE_MAPPING) default: return VGLTF_KEY_UNKNOWN; } } + +#undef VGLTF_GENERATE_SDL_KEYCODE_MAPPING + bool vgltf_platform_poll_event(struct vgltf_platform *platform, struct vgltf_event *event) { (void)platform; @@ -106,16 +58,12 @@ bool vgltf_platform_poll_event(struct vgltf_platform *platform, event->type = VGLTF_EVENT_KEY_DOWN; event->key.key = vgltf_key_from_sdl_keycode(sdl_event.key.key); break; - case SDL_EVENT_WINDOW_RESIZED: - event->type = VGLTF_EVENT_WINDOW_RESIZED; - event->window_resized.width = sdl_event.display.data1; - event->window_resized.height = sdl_event.display.data2; - break; default: event->type = VGLTF_EVENT_UNKNOWN; break; } } + return pending_events; } bool vgltf_platform_get_window_size(struct vgltf_platform *platform, @@ -123,7 +71,31 @@ bool vgltf_platform_get_window_size(struct vgltf_platform *platform, return SDL_GetWindowSize(platform->window, &window_size->width, &window_size->height); } -char const *const * +bool vgltf_platform_get_current_time_nanoseconds(long *time) { + if (!SDL_GetCurrentTime(time)) { + VGLTF_LOG_ERR("'SDL_GetCurrentTime failed: %s", SDL_GetError()); + goto err; + } + + return true; +err: + return false; +} + +char *vgltf_platform_read_file_to_string(const char *filepath, + size_t *out_size) { + char *file_data = SDL_LoadFile(filepath, out_size); + if (!file_data) { + VGLTF_LOG_ERR("Couldn't load file: %s", SDL_GetError()); + return NULL; + } + + return file_data; +} + +#include + +const char *const * vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform, uint32_t *count) { (void)platform; diff --git a/src/renderer.c b/src/renderer.c deleted file mode 100644 index 7022af6..0000000 --- a/src/renderer.c +++ /dev/null @@ -1,1470 +0,0 @@ -#include "log.h" -#include "renderer.h" -#include "src/platform.h" -#include "vulkan/vulkan_core.h" -#include - -static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"}; -static constexpr int VALIDATION_LAYER_COUNT = - sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]); - -#ifdef VGLTF_DEBUG -static constexpr bool enable_validation_layers = true; -#else -static constexpr bool enable_validation_layers = false; -#endif - -static VKAPI_ATTR VkBool32 VKAPI_CALL -debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, - VkDebugUtilsMessageTypeFlagBitsEXT message_type, - const VkDebugUtilsMessengerCallbackDataEXT *callback_data, - void *user_data) { - (void)message_severity; - (void)message_type; - (void)user_data; - VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage); - return VK_FALSE; -} - -static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10; -struct required_instance_extensions { - const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; - uint32_t count; -}; -void required_instance_extensions_push( - struct required_instance_extensions *required_instance_extensions, - const char *required_instance_extension) { - if (required_instance_extensions->count == - REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { - VGLTF_PANIC("required instance extensions array is full"); - } - required_instance_extensions - ->extensions[required_instance_extensions->count++] = - required_instance_extension; -} - -static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128; -struct supported_instance_extensions { - VkExtensionProperties - properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; - uint32_t count; -}; -bool supported_instance_extensions_init( - struct supported_instance_extensions *supported_instance_extensions) { - if (vkEnumerateInstanceExtensionProperties( - nullptr, &supported_instance_extensions->count, nullptr) != - VK_SUCCESS) { - goto err; - } - - if (supported_instance_extensions->count > - SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { - VGLTF_LOG_ERR("supported instance extensions array cannot fit all the " - "VkExtensionProperties"); - goto err; - } - - if (vkEnumerateInstanceExtensionProperties( - nullptr, &supported_instance_extensions->count, - supported_instance_extensions->properties) != VK_SUCCESS) { - goto err; - } - return true; -err: - return false; -} -void supported_instance_extensions_debug_print( - const struct supported_instance_extensions *supported_instance_extensions) { - VGLTF_LOG_DBG("Supported instance extensions:"); - for (uint32_t i = 0; i < supported_instance_extensions->count; i++) { - VGLTF_LOG_DBG("\t- %s", - supported_instance_extensions->properties[i].extensionName); - } -} -bool supported_instance_extensions_includes( - const struct supported_instance_extensions *supported_instance_extensions, - const char *extension_name) { - for (uint32_t supported_instance_extension_index = 0; - supported_instance_extension_index < - supported_instance_extensions->count; - supported_instance_extension_index++) { - const VkExtensionProperties *extension_properties = - &supported_instance_extensions - ->properties[supported_instance_extension_index]; - if (strcmp(extension_properties->extensionName, extension_name) == 0) { - return true; - } - } - - return false; -} - -static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64; -struct supported_validation_layers { - VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY]; - uint32_t count; -}; -bool supported_validation_layers_init( - struct supported_validation_layers *supported_validation_layers) { - if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count, - nullptr) != VK_SUCCESS) { - goto err; - } - - if (supported_validation_layers->count > - SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) { - VGLTF_LOG_ERR("supported validation layers array cannot fit all the " - "VkLayerProperties"); - goto err; - } - - if (vkEnumerateInstanceLayerProperties( - &supported_validation_layers->count, - supported_validation_layers->properties) != VK_SUCCESS) { - goto err; - } - - return true; -err: - return false; -} - -static bool are_validation_layer_supported() { - struct supported_validation_layers supported_layers = {}; - if (!supported_validation_layers_init(&supported_layers)) { - goto err; - } - - for (int requested_layer_index = 0; - requested_layer_index < VALIDATION_LAYER_COUNT; - requested_layer_index++) { - const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index]; - bool requested_layer_found = false; - for (uint32_t supported_layer_index = 0; - supported_layer_index < supported_layers.count; - supported_layer_index++) { - VkLayerProperties *supported_layer = - &supported_layers.properties[supported_layer_index]; - if (strcmp(requested_layer_name, supported_layer->layerName) == 0) { - requested_layer_found = true; - break; - } - } - - if (!requested_layer_found) { - goto err; - } - } - - return true; -err: - return false; -} - -static bool fetch_required_instance_extensions( - struct required_instance_extensions *required_extensions, - struct vgltf_platform *platform) { - struct supported_instance_extensions supported_extensions = {}; - if (!supported_instance_extensions_init(&supported_extensions)) { - VGLTF_LOG_ERR( - "Couldn't fetch supported instance extensions details (OOM?)"); - goto err; - } - supported_instance_extensions_debug_print(&supported_extensions); - - uint32_t platform_required_extension_count = 0; - const char *const *platform_required_extensions = - vgltf_platform_get_vulkan_instance_extensions( - platform, &platform_required_extension_count); - for (uint32_t platform_required_extension_index = 0; - platform_required_extension_index < platform_required_extension_count; - platform_required_extension_index++) { - required_instance_extensions_push( - required_extensions, - platform_required_extensions[platform_required_extension_index]); - } - required_instance_extensions_push( - required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); - - if (enable_validation_layers) { - required_instance_extensions_push(required_extensions, - VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - - bool all_extensions_supported = true; - for (uint32_t required_extension_index = 0; - required_extension_index < required_extensions->count; - required_extension_index++) { - const char *required_extension_name = - required_extensions->extensions[required_extension_index]; - if (!supported_instance_extensions_includes(&supported_extensions, - required_extension_name)) { - VGLTF_LOG_ERR("Unsupported instance extension: %s", - required_extension_name); - all_extensions_supported = false; - } - } - - if (!all_extensions_supported) { - VGLTF_LOG_ERR("Some required extensions are unsupported."); - goto err; - } - - return true; -err: - return false; -} - -static void populate_debug_messenger_create_info( - VkDebugUtilsMessengerCreateInfoEXT *create_info) { - *create_info = (VkDebugUtilsMessengerCreateInfoEXT){}; - create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - create_info->messageSeverity = - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - create_info->pfnUserCallback = debug_callback; -} - -static bool vgltf_renderer_create_instance(struct vgltf_renderer *renderer, - struct vgltf_platform *platform) { - VGLTF_LOG_INFO("Creating vulkan instance..."); - if (enable_validation_layers && !are_validation_layer_supported()) { - VGLTF_LOG_ERR("Requested validation layers aren't supported"); - goto err; - } - - VkApplicationInfo application_info = { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pApplicationName = "Visible GLTF", - .applicationVersion = VK_MAKE_VERSION(0, 1, 0), - .pEngineName = "No Engine", - .engineVersion = VK_MAKE_VERSION(1, 0, 0), - .apiVersion = VK_API_VERSION_1_2}; - - struct required_instance_extensions required_extensions = {}; - fetch_required_instance_extensions(&required_extensions, platform); - - VkInstanceCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, - .pApplicationInfo = &application_info, - .enabledExtensionCount = required_extensions.count, - .ppEnabledExtensionNames = required_extensions.extensions, - .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR}; - - VkDebugUtilsMessengerCreateInfoEXT debug_create_info; - if (enable_validation_layers) { - create_info.enabledLayerCount = VALIDATION_LAYER_COUNT; - create_info.ppEnabledLayerNames = VALIDATION_LAYERS; - populate_debug_messenger_create_info(&debug_create_info); - create_info.pNext = &debug_create_info; - } - - if (vkCreateInstance(&create_info, nullptr, &renderer->instance) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to create VkInstance"); - goto err; - } - - return true; -err: - return false; -} - -static VkResult create_debug_utils_messenger_ext( - VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info, - const VkAllocationCallbacks *allocator, - VkDebugUtilsMessengerEXT *debug_messenger) { - auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( - instance, "vkCreateDebugUtilsMessengerEXT"); - if (func != nullptr) { - return func(instance, create_info, allocator, debug_messenger); - } - - return VK_ERROR_EXTENSION_NOT_PRESENT; -} - -static void -destroy_debug_utils_messenger_ext(VkInstance instance, - VkDebugUtilsMessengerEXT debug_messenger, - const VkAllocationCallbacks *allocator) { - auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( - instance, "vkDestroyDebugUtilsMessengerEXT"); - if (func != nullptr) { - func(instance, debug_messenger, allocator); - } -} - -static void -vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) { - if (!enable_validation_layers) - return; - VkDebugUtilsMessengerCreateInfoEXT create_info; - populate_debug_messenger_create_info(&create_info); - create_debug_utils_messenger_ext(renderer->instance, &create_info, nullptr, - &renderer->debug_messenger); -} - -static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128; -struct available_physical_devices { - VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY]; - uint32_t count; -}; -static bool -available_physical_devices_init(VkInstance instance, - struct available_physical_devices *devices) { - - if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't enumerate physical devices"); - goto err; - } - - if (devices->count == 0) { - VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support"); - goto err; - } - - if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) { - VGLTF_LOG_ERR("available physical devices array cannot fit all available " - "physical devices"); - goto err; - } - - if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't enumerate physical devices"); - goto err; - } - - return true; -err: - return false; -} - -struct queue_family_indices { - uint32_t graphics_family; - uint32_t present_family; - bool has_graphics_family; - bool has_present_family; -}; -bool queue_family_indices_is_complete( - const struct queue_family_indices *indices) { - return indices->has_graphics_family && indices->has_present_family; -} -bool queue_family_indices_for_device(struct queue_family_indices *indices, - VkPhysicalDevice device, - VkSurfaceKHR surface) { - static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64; - uint32_t queue_family_count = 0; - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, - nullptr); - - if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) { - VGLTF_LOG_ERR( - "Queue family properties array cannot fit all queue family properties"); - goto err; - } - - VkQueueFamilyProperties - queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {}; - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, - queue_family_properties); - - for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; - queue_family_index++) { - VkQueueFamilyProperties *queue_family = - &queue_family_properties[queue_family_index]; - - VkBool32 present_support; - vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface, - &present_support); - - if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) { - indices->graphics_family = queue_family_index; - indices->has_graphics_family = true; - } - - if (present_support) { - indices->present_family = queue_family_index; - indices->has_present_family = true; - } - - if (queue_family_indices_is_complete(indices)) { - break; - } - } - - return true; -err: - return false; -} - -static bool is_in_array(uint32_t *array, int length, uint32_t value) { - for (int i = 0; i < length; i++) { - if (array[i] == value) { - return true; - } - } - - return false; -} - -static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 128; -struct supported_extensions { - VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY]; - uint32_t count; -}; -bool supported_extensions_init( - struct supported_extensions *supported_extensions, - VkPhysicalDevice device) { - if (vkEnumerateDeviceExtensionProperties(device, nullptr, - &supported_extensions->count, - nullptr) != VK_SUCCESS) { - goto err; - } - - if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) { - VGLTF_LOG_ERR( - "supported extensions aarray cannot fit all the VkExtensionProperties"); - goto err; - } - - if (vkEnumerateDeviceExtensionProperties( - device, nullptr, &supported_extensions->count, - supported_extensions->properties) != VK_SUCCESS) { - goto err; - } - - return true; -err: - return false; -} - -static bool supported_extensions_includes_extension( - struct supported_extensions *supported_extensions, - const char *extension_name) { - for (uint32_t supported_extension_index = 0; - supported_extension_index < supported_extensions->count; - supported_extension_index++) { - if (strcmp(supported_extensions->properties[supported_extension_index] - .extensionName, - extension_name) == 0) { - return true; - } - } - return false; -} - -static const char *DEVICE_EXTENSIONS[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, - "VK_KHR_portability_subset"}; -static constexpr int DEVICE_EXTENSION_COUNT = - sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]); -static bool are_device_extensions_supported(VkPhysicalDevice device) { - struct supported_extensions supported_extensions = {}; - if (!supported_extensions_init(&supported_extensions, device)) { - goto err; - } - - for (uint32_t required_extension_index = 0; - required_extension_index < DEVICE_EXTENSION_COUNT; - required_extension_index++) { - if (!supported_extensions_includes_extension( - &supported_extensions, - DEVICE_EXTENSIONS[required_extension_index])) { - VGLTF_LOG_DBG("Unsupported: %s", - DEVICE_EXTENSIONS[required_extension_index]); - goto err; - } - } - - return true; - -err: - return false; -} - -static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256; -static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256; -struct swapchain_support_details { - VkSurfaceCapabilitiesKHR capabilities; - VkSurfaceFormatKHR - formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT]; - VkPresentModeKHR - present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT]; - uint32_t format_count; - uint32_t present_mode_count; -}; -bool swapchain_support_details_query_from_device( - struct swapchain_support_details *swapchain_support_details, - VkPhysicalDevice device, VkSurfaceKHR surface) { - if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR( - device, surface, &swapchain_support_details->capabilities) != - VK_SUCCESS) { - goto err; - } - - if (vkGetPhysicalDeviceSurfaceFormatsKHR( - device, surface, &swapchain_support_details->format_count, nullptr) != - VK_SUCCESS) { - goto err; - } - - if (swapchain_support_details->format_count != 0 && - swapchain_support_details->format_count < - SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) { - if (vkGetPhysicalDeviceSurfaceFormatsKHR( - device, surface, &swapchain_support_details->format_count, - swapchain_support_details->formats) != VK_SUCCESS) { - goto err; - } - } - - if (vkGetPhysicalDeviceSurfacePresentModesKHR( - device, surface, &swapchain_support_details->present_mode_count, - nullptr) != VK_SUCCESS) { - goto err; - } - - if (swapchain_support_details->present_mode_count != 0 && - swapchain_support_details->present_mode_count < - SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) { - if (vkGetPhysicalDeviceSurfacePresentModesKHR( - device, surface, &swapchain_support_details->present_mode_count, - swapchain_support_details->present_modes) != VK_SUCCESS) { - goto err; - } - } - - return true; -err: - return false; -} - -static bool is_physical_device_suitable(VkPhysicalDevice device, - VkSurfaceKHR surface) { - struct queue_family_indices indices = {}; - queue_family_indices_for_device(&indices, device, surface); - - VGLTF_LOG_DBG("Checking for physical device extension support"); - bool extensions_supported = are_device_extensions_supported(device); - VGLTF_LOG_DBG("Supported: %d", extensions_supported); - - bool swapchain_adequate = false; - if (extensions_supported) { - - VGLTF_LOG_DBG("Checking for swapchain support details"); - struct swapchain_support_details swapchain_support_details = {}; - if (!swapchain_support_details_query_from_device(&swapchain_support_details, - device, surface)) { - VGLTF_LOG_ERR("Couldn't query swapchain support details from device"); - goto err; - } - - swapchain_adequate = swapchain_support_details.format_count > 0 && - swapchain_support_details.present_mode_count > 0; - } - - return queue_family_indices_is_complete(&indices) && extensions_supported && - swapchain_adequate; -err: - return false; -} - -static bool -vgltf_renderer_pick_physical_device(struct vgltf_renderer *renderer) { - VkPhysicalDevice physical_device = VK_NULL_HANDLE; - - struct available_physical_devices available_physical_devices = {}; - if (!available_physical_devices_init(renderer->instance, - &available_physical_devices)) { - VGLTF_LOG_ERR("Couldn't fetch available physical devices"); - goto err; - } - - for (uint32_t available_physical_device_index = 0; - available_physical_device_index < available_physical_devices.count; - available_physical_device_index++) { - VkPhysicalDevice available_physical_device = - available_physical_devices.devices[available_physical_device_index]; - if (is_physical_device_suitable(available_physical_device, - renderer->surface)) { - physical_device = available_physical_device; - break; - } - } - - if (physical_device == VK_NULL_HANDLE) { - VGLTF_LOG_ERR("Failed to find a suitable GPU"); - goto err; - } - - renderer->physical_device = physical_device; - - return true; -err: - return false; -} - -static bool -vgltf_renderer_create_logical_device(struct vgltf_renderer *renderer) { - struct queue_family_indices queue_family_indices = {}; - queue_family_indices_for_device(&queue_family_indices, - renderer->physical_device, renderer->surface); - static constexpr int MAX_QUEUE_FAMILY_COUNT = 2; - - uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {}; - int unique_queue_family_count = 0; - - if (!is_in_array(unique_queue_families, unique_queue_family_count, - queue_family_indices.graphics_family)) { - assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); - unique_queue_families[unique_queue_family_count++] = - queue_family_indices.graphics_family; - } - if (!is_in_array(unique_queue_families, unique_queue_family_count, - queue_family_indices.present_family)) { - assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); - unique_queue_families[unique_queue_family_count++] = - queue_family_indices.present_family; - } - - float queue_priority = 1.f; - VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {}; - int queue_create_info_count = 0; - for (int unique_queue_family_index = 0; - unique_queue_family_index < unique_queue_family_count; - unique_queue_family_index++) { - queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){ - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = unique_queue_families[unique_queue_family_index], - .queueCount = 1, - .pQueuePriorities = &queue_priority}; - } - - VkPhysicalDeviceFeatures device_features = {}; - VkDeviceCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pQueueCreateInfos = queue_create_infos, - .queueCreateInfoCount = queue_create_info_count, - .pEnabledFeatures = &device_features, - .ppEnabledExtensionNames = DEVICE_EXTENSIONS, - .enabledExtensionCount = DEVICE_EXTENSION_COUNT}; - if (vkCreateDevice(renderer->physical_device, &create_info, nullptr, - &renderer->device) != VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to create logical device"); - goto err; - } - - vkGetDeviceQueue(renderer->device, queue_family_indices.graphics_family, 0, - &renderer->graphics_queue); - vkGetDeviceQueue(renderer->device, queue_family_indices.present_family, 0, - &renderer->present_queue); - - return true; -err: - return false; -} - -static bool vgltf_renderer_create_surface(struct vgltf_renderer *renderer, - struct vgltf_platform *platform) { - if (!vgltf_platform_create_vulkan_surface(platform, renderer->instance, - &renderer->surface)) { - VGLTF_LOG_ERR("Couldn't create surface"); - goto err; - } - - return true; -err: - return false; -} - -static VkSurfaceFormatKHR -choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats, - uint32_t available_format_count) { - for (uint32_t available_format_index = 0; - available_format_index < available_format_count; - available_format_index++) { - VkSurfaceFormatKHR *available_format = - &available_formats[available_format_index]; - if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB && - available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { - return *available_format; - } - } - - return available_formats[0]; -} - -static VkPresentModeKHR -choose_swapchain_present_mode(VkPresentModeKHR *available_modes, - uint32_t available_mode_count) { - for (uint32_t available_mode_index = 0; - available_mode_index < available_mode_count; available_mode_index++) { - VkPresentModeKHR available_mode = available_modes[available_mode_index]; - if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) { - return available_mode; - } - } - - return VK_PRESENT_MODE_FIFO_KHR; -} - -static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) { - return value < min ? min : value > max ? max : value; -} - -static VkExtent2D -choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width, - int height) { - if (capabilities->currentExtent.width != UINT32_MAX) { - return capabilities->currentExtent; - } else { - VkExtent2D actual_extent = {width, height}; - actual_extent.width = - clamp_uint32(capabilities->minImageExtent.width, - capabilities->maxImageExtent.width, actual_extent.width); - actual_extent.height = - clamp_uint32(capabilities->minImageExtent.height, - capabilities->maxImageExtent.height, actual_extent.height); - return actual_extent; - } -} - -static bool vgltf_renderer_create_swapchain(struct vgltf_renderer *renderer) { - struct swapchain_support_details swapchain_support_details = {}; - swapchain_support_details_query_from_device( - &swapchain_support_details, renderer->physical_device, renderer->surface); - - VkSurfaceFormatKHR surface_format = - choose_swapchain_surface_format(swapchain_support_details.formats, - swapchain_support_details.format_count); - VkPresentModeKHR present_mode = choose_swapchain_present_mode( - swapchain_support_details.present_modes, - swapchain_support_details.present_mode_count); - - VkExtent2D extent = choose_swapchain_extent( - &swapchain_support_details.capabilities, renderer->window_size.width, - renderer->window_size.height); - uint32_t image_count = - swapchain_support_details.capabilities.minImageCount + 1; - if (swapchain_support_details.capabilities.maxImageCount > 0 && - image_count > swapchain_support_details.capabilities.maxImageCount) { - image_count = swapchain_support_details.capabilities.maxImageCount; - } - - VkSwapchainCreateInfoKHR create_info = { - .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, - .surface = renderer->surface, - .minImageCount = image_count, - .imageFormat = surface_format.format, - .imageColorSpace = surface_format.colorSpace, - .imageExtent = extent, - .imageArrayLayers = 1, - .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT}; - struct queue_family_indices indices = {}; - queue_family_indices_for_device(&indices, renderer->physical_device, - renderer->surface); - uint32_t queue_family_indices[] = {indices.graphics_family, - indices.present_family}; - if (indices.graphics_family != indices.present_family) { - create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; - create_info.queueFamilyIndexCount = 2; - create_info.pQueueFamilyIndices = queue_family_indices; - } else { - create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - } - - create_info.preTransform = - swapchain_support_details.capabilities.currentTransform; - create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - create_info.presentMode = present_mode; - create_info.clipped = VK_TRUE; - create_info.oldSwapchain = VK_NULL_HANDLE; - - if (vkCreateSwapchainKHR(renderer->device, &create_info, nullptr, - &renderer->swapchain) != VK_SUCCESS) { - VGLTF_LOG_ERR("Swapchain creation failed!"); - goto err; - } - - if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain, - &renderer->swapchain_image_count, - nullptr) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't get swapchain image count"); - goto destroy_swapchain; - } - - if (renderer->swapchain_image_count > - VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) { - VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images", - renderer->swapchain_image_count); - goto destroy_swapchain; - } - - if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain, - &renderer->swapchain_image_count, - renderer->swapchain_images) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't get swapchain images"); - goto destroy_swapchain; - } - - renderer->swapchain_image_format = surface_format.format; - renderer->swapchain_extent = extent; - - return true; -destroy_swapchain: - vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -err: - return false; -} - -static bool vgltf_renderer_create_image_views(struct vgltf_renderer *renderer) { - uint32_t swapchain_image_index; - for (swapchain_image_index = 0; - swapchain_image_index < renderer->swapchain_image_count; - swapchain_image_index++) { - VkImage swapchain_image = renderer->swapchain_images[swapchain_image_index]; - - VkImageViewCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = swapchain_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = renderer->swapchain_image_format, - .components = {VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY}, - .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1}}; - - if (vkCreateImageView( - renderer->device, &create_info, nullptr, - &renderer->swapchain_image_views[swapchain_image_index]) != - VK_SUCCESS) { - goto err; - } - } - return true; -err: - for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index; - to_remove_index++) { - vkDestroyImageView(renderer->device, - renderer->swapchain_image_views[to_remove_index], - nullptr); - } - return false; -} - -static bool create_shader_module(VkDevice device, const unsigned char *code, - int size, VkShaderModule *out) { - VkShaderModuleCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .codeSize = size, - .pCode = (const uint32_t *)code, - }; - if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't create shader module"); - goto err; - } - return true; -err: - return false; -} - -static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) { - VkAttachmentDescription color_attachment = { - .format = renderer->swapchain_image_format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}; - VkAttachmentReference color_attachment_ref = { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - }; - VkSubpassDescription subpass = {.pipelineBindPoint = - VK_PIPELINE_BIND_POINT_GRAPHICS, - .pColorAttachments = &color_attachment_ref, - .colorAttachmentCount = 1}; - VkSubpassDependency dependency = { - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = 0, - .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT}; - - VkRenderPassCreateInfo render_pass_info = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &color_attachment, - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 1, - .pDependencies = &dependency}; - - if (vkCreateRenderPass(renderer->device, &render_pass_info, nullptr, - &renderer->render_pass) != VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to create render pass"); - goto err; - } - - return true; -err: - return false; -} - -static bool -vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) { - static constexpr unsigned char triangle_shader_vert_code[] = { -#embed "../compiled_shaders/triangle.vert.spv" - }; - static constexpr unsigned char triangle_shader_frag_code[] = { -#embed "../compiled_shaders/triangle.frag.spv" - }; - - VkShaderModule triangle_shader_vert_module; - if (!create_shader_module(renderer->device, triangle_shader_vert_code, - sizeof(triangle_shader_vert_code), - &triangle_shader_vert_module)) { - VGLTF_LOG_ERR("Couldn't create triangle vert shader module"); - goto err; - } - - VkShaderModule triangle_shader_frag_module; - if (!create_shader_module(renderer->device, triangle_shader_frag_code, - sizeof(triangle_shader_frag_code), - &triangle_shader_frag_module)) { - VGLTF_LOG_ERR("Couldn't create triangle frag shader module"); - goto destroy_vert_shader_module; - } - - VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = triangle_shader_vert_module, - .pName = "main"}; - VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = triangle_shader_frag_module, - .pName = "main"}; - VkPipelineShaderStageCreateInfo shader_stages[] = { - triangle_shader_vert_stage_create_info, - triangle_shader_frag_stage_create_info}; - - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - - VkPipelineDynamicStateCreateInfo dynamic_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]), - .pDynamicStates = dynamic_states}; - - VkPipelineVertexInputStateCreateInfo vertex_input_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 0, - .vertexAttributeDescriptionCount = 0, - }; - - VkPipelineInputAssemblyStateCreateInfo input_assembly = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - .primitiveRestartEnable = VK_FALSE, - }; - - VkPipelineViewportStateCreateInfo viewport_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1}; - - VkPipelineRasterizationStateCreateInfo rasterizer = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .lineWidth = 1.f, - .cullMode = VK_CULL_MODE_BACK_BIT, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE}; - - VkPipelineMultisampleStateCreateInfo multisampling = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .sampleShadingEnable = VK_FALSE, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - }; - - VkPipelineColorBlendAttachmentState color_blend_attachment = { - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - .blendEnable = VK_FALSE, - }; - - VkPipelineColorBlendStateCreateInfo color_blending = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = VK_FALSE, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment}; - - VkPipelineLayoutCreateInfo pipeline_layout_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - }; - - if (vkCreatePipelineLayout(renderer->device, &pipeline_layout_info, nullptr, - &renderer->pipeline_layout) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't create pipeline layout"); - goto destroy_frag_shader_module; - } - - VkGraphicsPipelineCreateInfo pipeline_info = { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, - .pStages = shader_stages, - .pVertexInputState = &vertex_input_info, - .pInputAssemblyState = &input_assembly, - .pViewportState = &viewport_state, - .pRasterizationState = &rasterizer, - .pMultisampleState = &multisampling, - .pColorBlendState = &color_blending, - .pDynamicState = &dynamic_state, - .layout = renderer->pipeline_layout, - .renderPass = renderer->render_pass, - .subpass = 0, - }; - - if (vkCreateGraphicsPipelines(renderer->device, VK_NULL_HANDLE, 1, - &pipeline_info, nullptr, - &renderer->graphics_pipeline) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't create pipeline"); - goto destroy_pipeline_layout; - } - - vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr); - vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr); - return true; -destroy_pipeline_layout: - vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); -destroy_frag_shader_module: - vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr); -destroy_vert_shader_module: - vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr); -err: - return false; -} - -static bool -vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) { - for (uint32_t i = 0; i < renderer->swapchain_image_count; i++) { - VkImageView attachments[] = {renderer->swapchain_image_views[i]}; - - VkFramebufferCreateInfo framebuffer_info = { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .renderPass = renderer->render_pass, - .attachmentCount = 1, - .pAttachments = attachments, - .width = renderer->swapchain_extent.width, - .height = renderer->swapchain_extent.height, - .layers = 1}; - - if (vkCreateFramebuffer(renderer->device, &framebuffer_info, nullptr, - &renderer->swapchain_framebuffers[i]) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to create framebuffer"); - goto err; - } - } - - return true; -err: - return false; -} - -static bool -vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) { - struct queue_family_indices queue_family_indices = {}; - if (!queue_family_indices_for_device(&queue_family_indices, - renderer->physical_device, - renderer->surface)) { - VGLTF_LOG_ERR("Couldn't fetch queue family indices"); - goto err; - } - - VkCommandPoolCreateInfo pool_info = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = queue_family_indices.graphics_family}; - - if (vkCreateCommandPool(renderer->device, &pool_info, nullptr, - &renderer->command_pool) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't create command pool"); - goto err; - } - - return true; -err: - return false; -} - -static bool -vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) { - VkCommandBufferAllocateInfo allocate_info = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = renderer->command_pool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; - - if (vkAllocateCommandBuffers(renderer->device, &allocate_info, - renderer->command_buffer) != VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't allocate command buffers"); - goto err; - } - - return true; -err: - return false; -} - -static bool -vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) { - VkSemaphoreCreateInfo semaphore_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - }; - - VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .flags = VK_FENCE_CREATE_SIGNALED_BIT}; - - int frame_in_flight_index = 0; - for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; - frame_in_flight_index++) { - if (vkCreateSemaphore( - renderer->device, &semaphore_info, nullptr, - &renderer->image_available_semaphores[frame_in_flight_index]) != - VK_SUCCESS || - vkCreateSemaphore( - renderer->device, &semaphore_info, nullptr, - &renderer->render_finished_semaphores[frame_in_flight_index]) != - VK_SUCCESS || - vkCreateFence(renderer->device, &fence_info, nullptr, - &renderer->in_flight_fences[frame_in_flight_index]) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Couldn't create sync objects"); - goto err; - } - } - - return true; -err: - for (int frame_in_flight_to_delete_index = 0; - frame_in_flight_to_delete_index < frame_in_flight_index; - frame_in_flight_to_delete_index++) { - vkDestroyFence(renderer->device, - renderer->in_flight_fences[frame_in_flight_index], nullptr); - vkDestroySemaphore( - renderer->device, - renderer->render_finished_semaphores[frame_in_flight_index], nullptr); - vkDestroySemaphore( - renderer->device, - renderer->image_available_semaphores[frame_in_flight_index], nullptr); - } - return false; -} - -static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) { - for (uint32_t framebuffer_index = 0; - framebuffer_index < renderer->swapchain_image_count; - framebuffer_index++) { - vkDestroyFramebuffer(renderer->device, - renderer->swapchain_framebuffers[framebuffer_index], - nullptr); - } - - for (uint32_t image_view_index = 0; - image_view_index < renderer->swapchain_image_count; image_view_index++) { - vkDestroyImageView(renderer->device, - renderer->swapchain_image_views[image_view_index], - nullptr); - } - - vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -} - -static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) { - vkDeviceWaitIdle(renderer->device); - vgltf_renderer_cleanup_swapchain(renderer); - - // TODO add error handling - vgltf_renderer_create_swapchain(renderer); - vgltf_renderer_create_image_views(renderer); - vgltf_renderer_create_framebuffers(renderer); - return true; -} - -bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer) { - vkWaitForFences(renderer->device, 1, - &renderer->in_flight_fences[renderer->current_frame], VK_TRUE, - UINT64_MAX); - - uint32_t image_index; - VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR( - renderer->device, renderer->swapchain, UINT64_MAX, - renderer->image_available_semaphores[renderer->current_frame], - VK_NULL_HANDLE, &image_index); - if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR || - acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR || - renderer->framebuffer_resized) { - renderer->framebuffer_resized = false; - vgltf_renderer_recreate_swapchain(renderer); - return true; - } else if (acquire_swapchain_image_result != VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to acquire a swapchain image"); - goto err; - } - - vkResetFences(renderer->device, 1, - &renderer->in_flight_fences[renderer->current_frame]); - - vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0); - VkCommandBufferBeginInfo begin_info = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - }; - - if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame], - &begin_info) != VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to begin recording command buffer"); - goto err; - } - - VkRenderPassBeginInfo render_pass_info = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = renderer->render_pass, - .framebuffer = renderer->swapchain_framebuffers[image_index], - .renderArea = {.offset = {}, .extent = renderer->swapchain_extent}, - .clearValueCount = 1, - .pClearValues = - &(const VkClearValue){.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}}, - - }; - - vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame], - &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); - vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame], - VK_PIPELINE_BIND_POINT_GRAPHICS, - renderer->graphics_pipeline); - VkViewport viewport = {.x = 0.f, - .y = 0.f, - .width = (float)renderer->swapchain_extent.width, - .height = (float)renderer->swapchain_extent.height, - .minDepth = 0.f, - .maxDepth = 1.f}; - vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1, - &viewport); - VkRect2D scissor = {.offset = {}, .extent = renderer->swapchain_extent}; - vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1, - &scissor); - - vkCmdDraw(renderer->command_buffer[renderer->current_frame], 3, 1, 0, 0); - - vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]); - - if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to record command buffer"); - goto err; - } - - VkSubmitInfo submit_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - }; - - VkSemaphore wait_semaphores[] = { - renderer->image_available_semaphores[renderer->current_frame]}; - VkPipelineStageFlags wait_stages[] = { - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = wait_semaphores; - submit_info.pWaitDstStageMask = wait_stages; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = - &renderer->command_buffer[renderer->current_frame]; - - VkSemaphore signal_semaphores[] = { - renderer->render_finished_semaphores[renderer->current_frame]}; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = signal_semaphores; - if (vkQueueSubmit(renderer->graphics_queue, 1, &submit_info, - renderer->in_flight_fences[renderer->current_frame]) != - VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to submit draw command buffer"); - goto err; - } - - VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - .waitSemaphoreCount = 1, - .pWaitSemaphores = signal_semaphores}; - - VkSwapchainKHR swapchains[] = {renderer->swapchain}; - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchains; - present_info.pImageIndices = &image_index; - VkResult result = vkQueuePresentKHR(renderer->present_queue, &present_info); - if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) { - vgltf_renderer_recreate_swapchain(renderer); - } else if (acquire_swapchain_image_result != VK_SUCCESS) { - VGLTF_LOG_ERR("Failed to acquire a swapchain image"); - goto err; - } - renderer->current_frame = - (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; - return true; -err: - return false; -} - -bool vgltf_renderer_init(struct vgltf_renderer *renderer, - struct vgltf_platform *platform) { - if (!vgltf_renderer_create_instance(renderer, platform)) { - VGLTF_LOG_ERR("instance creation failed"); - goto err; - } - vgltf_renderer_setup_debug_messenger(renderer); - if (!vgltf_renderer_create_surface(renderer, platform)) { - goto destroy_instance; - } - - if (!vgltf_renderer_pick_physical_device(renderer)) { - VGLTF_LOG_ERR("Couldn't pick physical device"); - goto destroy_surface; - } - if (!vgltf_renderer_create_logical_device(renderer)) { - VGLTF_LOG_ERR("Couldn't create logical device"); - goto destroy_device; - } - - struct vgltf_window_size window_size = {800, 600}; - if (!vgltf_platform_get_window_size(platform, &window_size)) { - VGLTF_LOG_ERR("Couldn't get window size"); - goto destroy_device; - } - renderer->window_size = window_size; - - if (!vgltf_renderer_create_swapchain(renderer)) { - VGLTF_LOG_ERR("Couldn't create swapchain"); - goto destroy_device; - } - - if (!vgltf_renderer_create_image_views(renderer)) { - VGLTF_LOG_ERR("Couldn't create image views"); - goto destroy_swapchain; - } - - if (!vgltf_renderer_create_render_pass(renderer)) { - VGLTF_LOG_ERR("Couldn't create render pass"); - goto destroy_image_views; - } - - if (!vgltf_renderer_create_graphics_pipeline(renderer)) { - VGLTF_LOG_ERR("Couldn't create graphics pipeline"); - goto destroy_render_pass; - } - - if (!vgltf_renderer_create_framebuffers(renderer)) { - VGLTF_LOG_ERR("Couldn't create framebuffers"); - goto destroy_graphics_pipeline; - } - - if (!vgltf_renderer_create_command_pool(renderer)) { - VGLTF_LOG_ERR("Couldn't create command pool"); - goto destroy_frame_buffers; - } - - if (!vgltf_renderer_create_command_buffer(renderer)) { - VGLTF_LOG_ERR("Couldn't create command buffer"); - goto destroy_command_pool; - } - - if (!vgltf_renderer_create_sync_objects(renderer)) { - VGLTF_LOG_ERR("Couldn't create sync objects"); - goto destroy_command_pool; - } - - return true; - -destroy_command_pool: - vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr); -destroy_frame_buffers: - for (uint32_t swapchain_framebuffer_index = 0; - swapchain_framebuffer_index < renderer->swapchain_image_count; - swapchain_framebuffer_index++) { - vkDestroyFramebuffer( - renderer->device, - renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr); - } -destroy_graphics_pipeline: - vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr); - vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); -destroy_render_pass: - vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr); -destroy_image_views: - for (uint32_t swapchain_image_view_index = 0; - swapchain_image_view_index < renderer->swapchain_image_count; - swapchain_image_view_index++) { - vkDestroyImageView( - renderer->device, - renderer->swapchain_image_views[swapchain_image_view_index], nullptr); - } -destroy_swapchain: - vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -destroy_device: - vkDestroyDevice(renderer->device, nullptr); -destroy_surface: - vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr); -destroy_instance: - if (enable_validation_layers) { - destroy_debug_utils_messenger_ext(renderer->instance, - renderer->debug_messenger, nullptr); - } - vkDestroyInstance(renderer->instance, nullptr); -err: - return false; -} -void vgltf_renderer_deinit(struct vgltf_renderer *renderer) { - vkDeviceWaitIdle(renderer->device); - vgltf_renderer_cleanup_swapchain(renderer); - vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr); - vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); - vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr); - for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { - vkDestroySemaphore(renderer->device, - renderer->image_available_semaphores[i], nullptr); - vkDestroySemaphore(renderer->device, - renderer->render_finished_semaphores[i], nullptr); - vkDestroyFence(renderer->device, renderer->in_flight_fences[i], nullptr); - } - vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr); - vkDestroyDevice(renderer->device, nullptr); - if (enable_validation_layers) { - destroy_debug_utils_messenger_ext(renderer->instance, - renderer->debug_messenger, nullptr); - } - vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr); - vkDestroyInstance(renderer->instance, nullptr); -} -void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, - struct vgltf_window_size size) { - if (size.width > 0 && size.height > 0 && - size.width != renderer->window_size.width && - size.height != renderer->window_size.height) { - renderer->window_size = size; - renderer->framebuffer_resized = true; - } -} diff --git a/src/renderer.h b/src/renderer.h deleted file mode 100644 index a0417aa..0000000 --- a/src/renderer.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef VGLTF_RENDERER_H -#define VGLTF_RENDERER_H - -#include "platform.h" -#include - -constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2; -constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32; -struct vgltf_renderer { - VkInstance instance; - VkPhysicalDevice physical_device; - VkDevice device; - VkQueue graphics_queue; - VkQueue present_queue; - VkDebugUtilsMessengerEXT debug_messenger; - VkSurfaceKHR surface; - VkSwapchainKHR swapchain; - VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; - VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; - VkFormat swapchain_image_format; - VkExtent2D swapchain_extent; - uint32_t swapchain_image_count; - VkRenderPass render_pass; - VkPipelineLayout pipeline_layout; - VkPipeline graphics_pipeline; - VkFramebuffer - swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; - VkCommandPool command_pool; - VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; - VkSemaphore - image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; - VkSemaphore - render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; - VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; - struct vgltf_window_size window_size; - uint32_t current_frame; - bool framebuffer_resized; -}; -bool vgltf_renderer_init(struct vgltf_renderer *renderer, - struct vgltf_platform *platform); -void vgltf_renderer_deinit(struct vgltf_renderer *renderer); -bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer); -void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, - struct vgltf_window_size size); -#endif // VGLTF_RENDERER_H diff --git a/src/renderer/renderer.c b/src/renderer/renderer.c new file mode 100644 index 0000000..d34ef73 --- /dev/null +++ b/src/renderer/renderer.c @@ -0,0 +1,2559 @@ +#include "renderer.h" +#include "../image.h" +#include "../log.h" +#include "../maths.h" +#include "../platform.h" +#include "vma_usage.h" +#include + +#define TINYOBJ_LOADER_C_IMPLEMENTATION +#include "vendor/tiny_obj_loader_c.h" + +#include +#include + +static const char MODEL_PATH[] = "assets/model.obj"; +static const char TEXTURE_PATH[] = "assets/texture.png"; + +VkVertexInputBindingDescription vgltf_vertex_binding_description() { + return (VkVertexInputBindingDescription){ + .binding = 0, + .stride = sizeof(struct vgltf_vertex), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX}; +} +struct vgltf_vertex_input_attribute_descriptions +vgltf_vertex_attribute_descriptions(void) { + return (struct vgltf_vertex_input_attribute_descriptions){ + .descriptions = {(VkVertexInputAttributeDescription){ + .binding = 0, + .location = 0, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = offsetof(struct vgltf_vertex, position)}, + (VkVertexInputAttributeDescription){ + .binding = 0, + .location = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = offsetof(struct vgltf_vertex, color)}, + (VkVertexInputAttributeDescription){ + .binding = 0, + .location = 2, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vgltf_vertex, + texture_coordinates)}}, + .count = 3}; +} + +static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"}; +static constexpr int VALIDATION_LAYER_COUNT = + sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]); + +#ifdef VGLTF_DEBUG +static constexpr bool enable_validation_layers = true; +#else +static constexpr bool enable_validation_layers = false; +#endif + +static VKAPI_ATTR VkBool32 VKAPI_CALL +debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, + VkDebugUtilsMessageTypeFlagBitsEXT message_type, + const VkDebugUtilsMessengerCallbackDataEXT *callback_data, + void *user_data) { + (void)message_severity; + (void)message_type; + (void)user_data; + VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage); + return VK_FALSE; +} + +static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10; +struct required_instance_extensions { + const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; + uint32_t count; +}; +void required_instance_extensions_push( + struct required_instance_extensions *required_instance_extensions, + const char *required_instance_extension) { + if (required_instance_extensions->count == + REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { + VGLTF_PANIC("required instance extensions array is full"); + } + required_instance_extensions + ->extensions[required_instance_extensions->count++] = + required_instance_extension; +} + +static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128; +struct supported_instance_extensions { + VkExtensionProperties + properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; + uint32_t count; +}; +bool supported_instance_extensions_init( + struct supported_instance_extensions *supported_instance_extensions) { + if (vkEnumerateInstanceExtensionProperties( + nullptr, &supported_instance_extensions->count, nullptr) != + VK_SUCCESS) { + goto err; + } + + if (supported_instance_extensions->count > + SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { + VGLTF_LOG_ERR("supported instance extensions array cannot fit all the " + "VkExtensionProperties"); + goto err; + } + + if (vkEnumerateInstanceExtensionProperties( + nullptr, &supported_instance_extensions->count, + supported_instance_extensions->properties) != VK_SUCCESS) { + goto err; + } + return true; +err: + return false; +} +void supported_instance_extensions_debug_print( + const struct supported_instance_extensions *supported_instance_extensions) { + VGLTF_LOG_DBG("Supported instance extensions:"); + for (uint32_t i = 0; i < supported_instance_extensions->count; i++) { + VGLTF_LOG_DBG("\t- %s", + supported_instance_extensions->properties[i].extensionName); + } +} +bool supported_instance_extensions_includes( + const struct supported_instance_extensions *supported_instance_extensions, + const char *extension_name) { + for (uint32_t supported_instance_extension_index = 0; + supported_instance_extension_index < + supported_instance_extensions->count; + supported_instance_extension_index++) { + const VkExtensionProperties *extension_properties = + &supported_instance_extensions + ->properties[supported_instance_extension_index]; + if (strcmp(extension_properties->extensionName, extension_name) == 0) { + return true; + } + } + + return false; +} + +static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64; +struct supported_validation_layers { + VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY]; + uint32_t count; +}; +bool supported_validation_layers_init( + struct supported_validation_layers *supported_validation_layers) { + if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count, + nullptr) != VK_SUCCESS) { + goto err; + } + + if (supported_validation_layers->count > + SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) { + VGLTF_LOG_ERR("supported validation layers array cannot fit all the " + "VkLayerProperties"); + goto err; + } + + if (vkEnumerateInstanceLayerProperties( + &supported_validation_layers->count, + supported_validation_layers->properties) != VK_SUCCESS) { + goto err; + } + + return true; +err: + return false; +} + +static bool are_validation_layer_supported() { + struct supported_validation_layers supported_layers = {}; + if (!supported_validation_layers_init(&supported_layers)) { + goto err; + } + + for (int requested_layer_index = 0; + requested_layer_index < VALIDATION_LAYER_COUNT; + requested_layer_index++) { + const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index]; + bool requested_layer_found = false; + for (uint32_t supported_layer_index = 0; + supported_layer_index < supported_layers.count; + supported_layer_index++) { + VkLayerProperties *supported_layer = + &supported_layers.properties[supported_layer_index]; + if (strcmp(requested_layer_name, supported_layer->layerName) == 0) { + requested_layer_found = true; + break; + } + } + + if (!requested_layer_found) { + goto err; + } + } + + return true; +err: + return false; +} + +static bool fetch_required_instance_extensions( + struct required_instance_extensions *required_extensions, + struct vgltf_platform *platform) { + struct supported_instance_extensions supported_extensions = {}; + if (!supported_instance_extensions_init(&supported_extensions)) { + VGLTF_LOG_ERR( + "Couldn't fetch supported instance extensions details (OOM?)"); + goto err; + } + supported_instance_extensions_debug_print(&supported_extensions); + + uint32_t platform_required_extension_count = 0; + const char *const *platform_required_extensions = + vgltf_platform_get_vulkan_instance_extensions( + platform, &platform_required_extension_count); + for (uint32_t platform_required_extension_index = 0; + platform_required_extension_index < platform_required_extension_count; + platform_required_extension_index++) { + required_instance_extensions_push( + required_extensions, + platform_required_extensions[platform_required_extension_index]); + } +#ifdef VGLTF_PLATFORM_MACOS + required_instance_extensions_push( + required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif // VGLTF_PLATFORM_MACOS + + if (enable_validation_layers) { + required_instance_extensions_push(required_extensions, + VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + bool all_extensions_supported = true; + for (uint32_t required_extension_index = 0; + required_extension_index < required_extensions->count; + required_extension_index++) { + const char *required_extension_name = + required_extensions->extensions[required_extension_index]; + if (!supported_instance_extensions_includes(&supported_extensions, + required_extension_name)) { + VGLTF_LOG_ERR("Unsupported instance extension: %s", + required_extension_name); + all_extensions_supported = false; + } + } + + if (!all_extensions_supported) { + VGLTF_LOG_ERR("Some required extensions are unsupported."); + goto err; + } + + return true; +err: + return false; +} + +static void populate_debug_messenger_create_info( + VkDebugUtilsMessengerCreateInfoEXT *create_info) { + *create_info = (VkDebugUtilsMessengerCreateInfoEXT){}; + create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + create_info->messageSeverity = + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + create_info->pfnUserCallback = debug_callback; +} + +static bool vgltf_vk_instance_init(struct vgltf_vk_instance *instance, + struct vgltf_platform *platform) { + VGLTF_LOG_INFO("Creating vulkan instance..."); + if (enable_validation_layers && !are_validation_layer_supported()) { + VGLTF_LOG_ERR("Requested validation layers aren't supported"); + goto err; + } + + VkApplicationInfo application_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "Visible GLTF", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "No Engine", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = VK_API_VERSION_1_2}; + + struct required_instance_extensions required_extensions = {}; + fetch_required_instance_extensions(&required_extensions, platform); + + VkInstanceCreateFlags flags = 0; +#ifdef VGLTF_PLATFORM_MACOS + flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; +#endif // VGLTF_PLATFORM_MACOS + + VkInstanceCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &application_info, + .enabledExtensionCount = required_extensions.count, + .ppEnabledExtensionNames = required_extensions.extensions, + .flags = flags}; + + VkDebugUtilsMessengerCreateInfoEXT debug_create_info; + if (enable_validation_layers) { + create_info.enabledLayerCount = VALIDATION_LAYER_COUNT; + create_info.ppEnabledLayerNames = VALIDATION_LAYERS; + populate_debug_messenger_create_info(&debug_create_info); + create_info.pNext = &debug_create_info; + } + + if (vkCreateInstance(&create_info, nullptr, &instance->instance) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create VkInstance"); + goto err; + } + + return true; +err: + return false; +} +static void vgltf_vk_instance_deinit(struct vgltf_vk_instance *instance) { + vkDestroyInstance(instance->instance, nullptr); +} + +static VkResult create_debug_utils_messenger_ext( + VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info, + const VkAllocationCallbacks *allocator, + VkDebugUtilsMessengerEXT *debug_messenger) { + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) { + return func(instance, create_info, allocator, debug_messenger); + } + + return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +static void +destroy_debug_utils_messenger_ext(VkInstance instance, + VkDebugUtilsMessengerEXT debug_messenger, + const VkAllocationCallbacks *allocator) { + auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + instance, "vkDestroyDebugUtilsMessengerEXT"); + if (func != nullptr) { + func(instance, debug_messenger, allocator); + } +} + +static void +vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) { + if (!enable_validation_layers) + return; + VkDebugUtilsMessengerCreateInfoEXT create_info; + populate_debug_messenger_create_info(&create_info); + create_debug_utils_messenger_ext(renderer->instance.instance, &create_info, + nullptr, &renderer->debug_messenger); +} + +static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128; +struct available_physical_devices { + VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY]; + uint32_t count; +}; +static bool +available_physical_devices_init(VkInstance instance, + struct available_physical_devices *devices) { + + if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't enumerate physical devices"); + goto err; + } + + if (devices->count == 0) { + VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support"); + goto err; + } + + if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) { + VGLTF_LOG_ERR("available physical devices array cannot fit all available " + "physical devices"); + goto err; + } + + if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't enumerate physical devices"); + goto err; + } + + return true; +err: + return false; +} + +struct queue_family_indices { + uint32_t graphics_family; + uint32_t present_family; + bool has_graphics_family; + bool has_present_family; +}; +bool queue_family_indices_is_complete( + const struct queue_family_indices *indices) { + return indices->has_graphics_family && indices->has_present_family; +} +bool queue_family_indices_for_device(struct queue_family_indices *indices, + VkPhysicalDevice device, + VkSurfaceKHR surface) { + static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64; + uint32_t queue_family_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, + nullptr); + + if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) { + VGLTF_LOG_ERR( + "Queue family properties array cannot fit all queue family properties"); + goto err; + } + + VkQueueFamilyProperties + queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {}; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, + queue_family_properties); + + for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; + queue_family_index++) { + VkQueueFamilyProperties *queue_family = + &queue_family_properties[queue_family_index]; + + VkBool32 present_support; + vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface, + &present_support); + + if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) { + indices->graphics_family = queue_family_index; + indices->has_graphics_family = true; + } + + if (present_support) { + indices->present_family = queue_family_index; + indices->has_present_family = true; + } + + if (queue_family_indices_is_complete(indices)) { + break; + } + } + + return true; +err: + return false; +} + +static bool is_in_array(uint32_t *array, int length, uint32_t value) { + for (int i = 0; i < length; i++) { + if (array[i] == value) { + return true; + } + } + + return false; +} + +static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 1024; +struct supported_extensions { + VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY]; + uint32_t count; +}; +bool supported_extensions_init( + struct supported_extensions *supported_extensions, + VkPhysicalDevice device) { + if (vkEnumerateDeviceExtensionProperties(device, nullptr, + &supported_extensions->count, + nullptr) != VK_SUCCESS) { + goto err; + } + + if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) { + VGLTF_LOG_ERR("supported extensions array cannot fit all the supported " + "VkExtensionProperties (%u)", + supported_extensions->count); + goto err; + } + + if (vkEnumerateDeviceExtensionProperties( + device, nullptr, &supported_extensions->count, + supported_extensions->properties) != VK_SUCCESS) { + goto err; + } + + return true; +err: + return false; +} + +static bool supported_extensions_includes_extension( + struct supported_extensions *supported_extensions, + const char *extension_name) { + for (uint32_t supported_extension_index = 0; + supported_extension_index < supported_extensions->count; + supported_extension_index++) { + if (strcmp(supported_extensions->properties[supported_extension_index] + .extensionName, + extension_name) == 0) { + return true; + } + } + return false; +} + +static const char *DEVICE_EXTENSIONS[] = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, +#ifdef VGLTF_PLATFORM_MACOS + "VK_KHR_portability_subset", +#endif +}; +static constexpr int DEVICE_EXTENSION_COUNT = + sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]); +static bool are_device_extensions_supported(VkPhysicalDevice device) { + struct supported_extensions supported_extensions = {}; + if (!supported_extensions_init(&supported_extensions, device)) { + goto err; + } + + for (uint32_t required_extension_index = 0; + required_extension_index < DEVICE_EXTENSION_COUNT; + required_extension_index++) { + if (!supported_extensions_includes_extension( + &supported_extensions, + DEVICE_EXTENSIONS[required_extension_index])) { + VGLTF_LOG_DBG("Unsupported: %s", + DEVICE_EXTENSIONS[required_extension_index]); + goto err; + } + } + + return true; + +err: + return false; +} + +static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256; +static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256; +struct swapchain_support_details { + VkSurfaceCapabilitiesKHR capabilities; + VkSurfaceFormatKHR + formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT]; + VkPresentModeKHR + present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT]; + uint32_t format_count; + uint32_t present_mode_count; +}; +bool swapchain_support_details_query_from_device( + struct swapchain_support_details *swapchain_support_details, + VkPhysicalDevice device, VkSurfaceKHR surface) { + if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + device, surface, &swapchain_support_details->capabilities) != + VK_SUCCESS) { + goto err; + } + + if (vkGetPhysicalDeviceSurfaceFormatsKHR( + device, surface, &swapchain_support_details->format_count, nullptr) != + VK_SUCCESS) { + goto err; + } + + if (swapchain_support_details->format_count != 0 && + swapchain_support_details->format_count < + SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) { + if (vkGetPhysicalDeviceSurfaceFormatsKHR( + device, surface, &swapchain_support_details->format_count, + swapchain_support_details->formats) != VK_SUCCESS) { + goto err; + } + } + + if (vkGetPhysicalDeviceSurfacePresentModesKHR( + device, surface, &swapchain_support_details->present_mode_count, + nullptr) != VK_SUCCESS) { + goto err; + } + + if (swapchain_support_details->present_mode_count != 0 && + swapchain_support_details->present_mode_count < + SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) { + if (vkGetPhysicalDeviceSurfacePresentModesKHR( + device, surface, &swapchain_support_details->present_mode_count, + swapchain_support_details->present_modes) != VK_SUCCESS) { + goto err; + } + } + + return true; +err: + return false; +} + +static bool is_physical_device_suitable(VkPhysicalDevice device, + VkSurfaceKHR surface) { + struct queue_family_indices indices = {}; + queue_family_indices_for_device(&indices, device, surface); + + VGLTF_LOG_DBG("Checking for physical device extension support"); + bool extensions_supported = are_device_extensions_supported(device); + VGLTF_LOG_DBG("Supported: %d", extensions_supported); + + bool swapchain_adequate = false; + if (extensions_supported) { + + VGLTF_LOG_DBG("Checking for swapchain support details"); + struct swapchain_support_details swapchain_support_details = {}; + if (!swapchain_support_details_query_from_device(&swapchain_support_details, + device, surface)) { + VGLTF_LOG_ERR("Couldn't query swapchain support details from device"); + goto err; + } + + swapchain_adequate = swapchain_support_details.format_count > 0 && + swapchain_support_details.present_mode_count > 0; + } + + VkPhysicalDeviceFeatures supported_features; + vkGetPhysicalDeviceFeatures(device, &supported_features); + + return queue_family_indices_is_complete(&indices) && extensions_supported && + swapchain_adequate && supported_features.samplerAnisotropy; +err: + return false; +} + +static bool pick_physical_device(VkPhysicalDevice *physical_device, + struct vgltf_vk_instance *instance, + VkSurfaceKHR surface) { + VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE; + struct available_physical_devices available_physical_devices = {}; + if (!available_physical_devices_init(instance->instance, + &available_physical_devices)) { + VGLTF_LOG_ERR("Couldn't fetch available physical devices"); + goto err; + } + + for (uint32_t available_physical_device_index = 0; + available_physical_device_index < available_physical_devices.count; + available_physical_device_index++) { + VkPhysicalDevice available_physical_device = + available_physical_devices.devices[available_physical_device_index]; + if (is_physical_device_suitable(available_physical_device, surface)) { + vk_physical_device = available_physical_device; + break; + } + } + + if (vk_physical_device == VK_NULL_HANDLE) { + VGLTF_LOG_ERR("Failed to find a suitable GPU"); + goto err; + } + + *physical_device = vk_physical_device; + + return true; +err: + return false; +} + +static bool create_logical_device(VkDevice *device, VkQueue *graphics_queue, + VkQueue *present_queue, + VkPhysicalDevice physical_device, + VkSurfaceKHR surface) { + struct queue_family_indices queue_family_indices = {}; + queue_family_indices_for_device(&queue_family_indices, physical_device, + surface); + static constexpr int MAX_QUEUE_FAMILY_COUNT = 2; + + uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {}; + int unique_queue_family_count = 0; + + if (!is_in_array(unique_queue_families, unique_queue_family_count, + queue_family_indices.graphics_family)) { + assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); + unique_queue_families[unique_queue_family_count++] = + queue_family_indices.graphics_family; + } + if (!is_in_array(unique_queue_families, unique_queue_family_count, + queue_family_indices.present_family)) { + assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); + unique_queue_families[unique_queue_family_count++] = + queue_family_indices.present_family; + } + + float queue_priority = 1.f; + VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {}; + int queue_create_info_count = 0; + for (int unique_queue_family_index = 0; + unique_queue_family_index < unique_queue_family_count; + unique_queue_family_index++) { + queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){ + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = unique_queue_families[unique_queue_family_index], + .queueCount = 1, + .pQueuePriorities = &queue_priority}; + } + + VkPhysicalDeviceFeatures device_features = { + .samplerAnisotropy = VK_TRUE, + }; + VkDeviceCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pQueueCreateInfos = queue_create_infos, + .queueCreateInfoCount = queue_create_info_count, + .pEnabledFeatures = &device_features, + .ppEnabledExtensionNames = DEVICE_EXTENSIONS, + .enabledExtensionCount = DEVICE_EXTENSION_COUNT}; + if (vkCreateDevice(physical_device, &create_info, nullptr, device) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create logical device"); + goto err; + } + + vkGetDeviceQueue(*device, queue_family_indices.graphics_family, 0, + graphics_queue); + vkGetDeviceQueue(*device, queue_family_indices.present_family, 0, + present_queue); + + return true; +err: + return false; +} + +static bool create_allocator(VmaAllocator *allocator, + struct vgltf_vk_device *device, + struct vgltf_vk_instance *instance) { + VmaAllocatorCreateInfo create_info = {.device = device->device, + .instance = instance->instance, + .physicalDevice = + device->physical_device}; + + if (vmaCreateAllocator(&create_info, allocator) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create VMA allocator"); + goto err; + } + return true; +err: + return false; +} + +static bool vgltf_vk_surface_init(struct vgltf_vk_surface *surface, + struct vgltf_vk_instance *instance, + struct vgltf_platform *platform) { + if (!vgltf_platform_create_vulkan_surface(platform, instance->instance, + &surface->surface)) { + VGLTF_LOG_ERR("Couldn't create surface"); + goto err; + } + + return true; +err: + return false; +} + +static void vgltf_vk_surface_deinit(struct vgltf_vk_surface *surface, + struct vgltf_vk_instance *instance) { + vkDestroySurfaceKHR(instance->instance, surface->surface, nullptr); +} + +static VkSurfaceFormatKHR +choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats, + uint32_t available_format_count) { + for (uint32_t available_format_index = 0; + available_format_index < available_format_count; + available_format_index++) { + VkSurfaceFormatKHR *available_format = + &available_formats[available_format_index]; + if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB && + available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { + return *available_format; + } + } + + return available_formats[0]; +} + +static VkPresentModeKHR +choose_swapchain_present_mode(VkPresentModeKHR *available_modes, + uint32_t available_mode_count) { + for (uint32_t available_mode_index = 0; + available_mode_index < available_mode_count; available_mode_index++) { + VkPresentModeKHR available_mode = available_modes[available_mode_index]; + if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) { + return available_mode; + } + } + + return VK_PRESENT_MODE_FIFO_KHR; +} + +static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) { + return value < min ? min : value > max ? max : value; +} + +static VkExtent2D +choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width, + int height) { + if (capabilities->currentExtent.width != UINT32_MAX) { + return capabilities->currentExtent; + } else { + VkExtent2D actual_extent = {width, height}; + actual_extent.width = + clamp_uint32(capabilities->minImageExtent.width, + capabilities->maxImageExtent.width, actual_extent.width); + actual_extent.height = + clamp_uint32(capabilities->minImageExtent.height, + capabilities->maxImageExtent.height, actual_extent.height); + return actual_extent; + } +} + +static bool create_swapchain(struct vgltf_vk_swapchain *swapchain, + struct vgltf_vk_device *device, + struct vgltf_vk_surface *surface, + struct vgltf_window_size *window_size) { + struct swapchain_support_details swapchain_support_details = {}; + swapchain_support_details_query_from_device( + &swapchain_support_details, device->physical_device, surface->surface); + + VkSurfaceFormatKHR surface_format = + choose_swapchain_surface_format(swapchain_support_details.formats, + swapchain_support_details.format_count); + VkPresentModeKHR present_mode = choose_swapchain_present_mode( + swapchain_support_details.present_modes, + swapchain_support_details.present_mode_count); + + VkExtent2D extent = + choose_swapchain_extent(&swapchain_support_details.capabilities, + window_size->width, window_size->height); + uint32_t image_count = + swapchain_support_details.capabilities.minImageCount + 1; + if (swapchain_support_details.capabilities.maxImageCount > 0 && + image_count > swapchain_support_details.capabilities.maxImageCount) { + image_count = swapchain_support_details.capabilities.maxImageCount; + } + + VkSwapchainCreateInfoKHR create_info = { + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .surface = surface->surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT}; + struct queue_family_indices indices = {}; + queue_family_indices_for_device(&indices, device->physical_device, + surface->surface); + uint32_t queue_family_indices[] = {indices.graphics_family, + indices.present_family}; + if (indices.graphics_family != indices.present_family) { + create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; + create_info.queueFamilyIndexCount = 2; + create_info.pQueueFamilyIndices = queue_family_indices; + } else { + create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + } + + create_info.preTransform = + swapchain_support_details.capabilities.currentTransform; + create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + create_info.presentMode = present_mode; + create_info.clipped = VK_TRUE; + create_info.oldSwapchain = VK_NULL_HANDLE; + + if (vkCreateSwapchainKHR(device->device, &create_info, nullptr, + &swapchain->swapchain) != VK_SUCCESS) { + VGLTF_LOG_ERR("Swapchain creation failed!"); + goto err; + } + + if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain, + &swapchain->swapchain_image_count, + nullptr) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't get swapchain image count"); + goto destroy_swapchain; + } + + if (swapchain->swapchain_image_count > + VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) { + VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images", + swapchain->swapchain_image_count); + goto destroy_swapchain; + } + + if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain, + &swapchain->swapchain_image_count, + swapchain->swapchain_images) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't get swapchain images"); + goto destroy_swapchain; + } + + swapchain->swapchain_image_format = surface_format.format; + swapchain->swapchain_extent = extent; + + return true; +destroy_swapchain: + vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +err: + return false; +} + +static bool create_image_view(struct vgltf_vk_device *device, VkImage image, + VkFormat format, VkImageView *image_view, + VkImageAspectFlags aspect_flags, + uint32_t mip_level_count) { + + VkImageViewCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = {VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + .subresourceRange = {.aspectMask = aspect_flags, + .levelCount = mip_level_count, + .layerCount = 1}}; + if (vkCreateImageView(device->device, &create_info, nullptr, image_view) != + VK_SUCCESS) { + return false; + } + + return true; +} + +static bool create_swapchain_image_views(struct vgltf_vk_swapchain *swapchain, + struct vgltf_vk_device *device) { + uint32_t swapchain_image_index; + for (swapchain_image_index = 0; + swapchain_image_index < swapchain->swapchain_image_count; + swapchain_image_index++) { + VkImage swapchain_image = + swapchain->swapchain_images[swapchain_image_index]; + + if (!create_image_view( + device, swapchain_image, swapchain->swapchain_image_format, + &swapchain->swapchain_image_views[swapchain_image_index], + VK_IMAGE_ASPECT_COLOR_BIT, 1)) { + goto err; + } + } + return true; +err: + for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index; + to_remove_index++) { + vkDestroyImageView(device->device, + swapchain->swapchain_image_views[to_remove_index], + nullptr); + } + return false; +} + +static bool create_shader_module(VkDevice device, const unsigned char *code, + int size, VkShaderModule *out) { + VkShaderModuleCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = size, + .pCode = (const uint32_t *)code, + }; + if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create shader module"); + goto err; + } + return true; +err: + return false; +} + +static VkFormat find_supported_format(struct vgltf_renderer *renderer, + const VkFormat *candidates, + int candidate_count, VkImageTiling tiling, + VkFormatFeatureFlags features) { + for (int candidate_index = 0; candidate_index < candidate_count; + candidate_index++) { + VkFormat candidate = candidates[candidate_index]; + VkFormatProperties properties; + vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device, + candidate, &properties); + if (tiling == VK_IMAGE_TILING_LINEAR && + (properties.linearTilingFeatures & features) == features) { + return candidate; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + (properties.optimalTilingFeatures & features) == features) { + return candidate; + } + } + + return VK_FORMAT_UNDEFINED; +} + +static VkFormat find_depth_format(struct vgltf_renderer *renderer) { + return find_supported_format(renderer, + (const VkFormat[]){VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT}, + 3, VK_IMAGE_TILING_OPTIMAL, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT); +} + +static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) { + VkAttachmentDescription color_attachment = { + .format = renderer->swapchain.swapchain_image_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}; + VkAttachmentReference color_attachment_ref = { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + VkAttachmentDescription depth_attachment = { + .format = find_depth_format(renderer), + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; + VkAttachmentReference depth_attachment_ref = { + .attachment = 1, + .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pColorAttachments = &color_attachment_ref, + .colorAttachmentCount = 1, + .pDepthStencilAttachment = &depth_attachment_ref}; + VkSubpassDependency dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, + .srcAccessMask = 0, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT}; + + VkAttachmentDescription attachments[] = {color_attachment, depth_attachment}; + int attachment_count = sizeof(attachments) / sizeof(attachments[0]); + VkRenderPassCreateInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = attachment_count, + .pAttachments = attachments, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &dependency}; + + if (vkCreateRenderPass(renderer->device.device, &render_pass_info, nullptr, + &renderer->render_pass) != VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create render pass"); + goto err; + } + + return true; +err: + return false; +} + +static bool +vgltf_renderer_create_descriptor_set_layout(struct vgltf_renderer *renderer) { + VkDescriptorSetLayoutBinding ubo_layout_binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + }; + VkDescriptorSetLayoutBinding sampler_layout_binding = { + .binding = 1, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + + VkDescriptorSetLayoutBinding bindings[] = {ubo_layout_binding, + sampler_layout_binding}; + int binding_count = sizeof(bindings) / sizeof(bindings[0]); + + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = binding_count, + .pBindings = bindings}; + + if (vkCreateDescriptorSetLayout(renderer->device.device, &layout_info, + nullptr, &renderer->descriptor_set_layout) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create descriptor set layout"); + goto err; + } + return true; +err: + return false; +} + +static bool +vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) { + static constexpr unsigned char triangle_shader_vert_code[] = { +#embed "../compiled_shaders/triangle.vert.spv" + }; + static constexpr unsigned char triangle_shader_frag_code[] = { +#embed "../compiled_shaders/triangle.frag.spv" + }; + + VkShaderModule triangle_shader_vert_module; + if (!create_shader_module(renderer->device.device, triangle_shader_vert_code, + sizeof(triangle_shader_vert_code), + &triangle_shader_vert_module)) { + VGLTF_LOG_ERR("Couldn't create triangle vert shader module"); + goto err; + } + + VkShaderModule triangle_shader_frag_module; + if (!create_shader_module(renderer->device.device, triangle_shader_frag_code, + sizeof(triangle_shader_frag_code), + &triangle_shader_frag_module)) { + VGLTF_LOG_ERR("Couldn't create triangle frag shader module"); + goto destroy_vert_shader_module; + } + + VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = triangle_shader_vert_module, + .pName = "main"}; + VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = triangle_shader_frag_module, + .pName = "main"}; + VkPipelineShaderStageCreateInfo shader_stages[] = { + triangle_shader_vert_stage_create_info, + triangle_shader_frag_stage_create_info}; + + VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]), + .pDynamicStates = dynamic_states}; + + VkVertexInputBindingDescription vertex_binding_description = + vgltf_vertex_binding_description(); + struct vgltf_vertex_input_attribute_descriptions + vertex_attribute_descriptions = vgltf_vertex_attribute_descriptions(); + + VkPipelineVertexInputStateCreateInfo vertex_input_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .vertexAttributeDescriptionCount = vertex_attribute_descriptions.count, + .pVertexBindingDescriptions = &vertex_binding_description, + .pVertexAttributeDescriptions = + vertex_attribute_descriptions.descriptions}; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1}; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = VK_FALSE}; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = VK_COMPARE_OP_LESS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment}; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &renderer->descriptor_set_layout}; + + if (vkCreatePipelineLayout(renderer->device.device, &pipeline_layout_info, + nullptr, + &renderer->pipeline_layout) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create pipeline layout"); + goto destroy_frag_shader_module; + } + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = shader_stages, + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pColorBlendState = &color_blending, + .pDepthStencilState = &depth_stencil, + .pDynamicState = &dynamic_state, + .layout = renderer->pipeline_layout, + .renderPass = renderer->render_pass, + .subpass = 0, + }; + + if (vkCreateGraphicsPipelines(renderer->device.device, VK_NULL_HANDLE, 1, + &pipeline_info, nullptr, + &renderer->graphics_pipeline) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create pipeline"); + goto destroy_pipeline_layout; + } + + vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module, + nullptr); + vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module, + nullptr); + return true; +destroy_pipeline_layout: + vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, + nullptr); +destroy_frag_shader_module: + vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module, + nullptr); +destroy_vert_shader_module: + vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module, + nullptr); +err: + return false; +} + +static bool +vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) { + for (uint32_t i = 0; i < renderer->swapchain.swapchain_image_count; i++) { + VkImageView attachments[] = {renderer->swapchain.swapchain_image_views[i], + renderer->depth_image_view}; + int attachment_count = sizeof(attachments) / sizeof(attachments[0]); + + VkFramebufferCreateInfo framebuffer_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = renderer->render_pass, + .attachmentCount = attachment_count, + .pAttachments = attachments, + .width = renderer->swapchain.swapchain_extent.width, + .height = renderer->swapchain.swapchain_extent.height, + .layers = 1}; + + if (vkCreateFramebuffer(renderer->device.device, &framebuffer_info, nullptr, + &renderer->swapchain_framebuffers[i]) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create framebuffer"); + goto err; + } + } + + return true; +err: + return false; +} + +static bool +vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) { + struct queue_family_indices queue_family_indices = {}; + if (!queue_family_indices_for_device(&queue_family_indices, + renderer->device.physical_device, + renderer->surface.surface)) { + VGLTF_LOG_ERR("Couldn't fetch queue family indices"); + goto err; + } + + VkCommandPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue_family_indices.graphics_family}; + + if (vkCreateCommandPool(renderer->device.device, &pool_info, nullptr, + &renderer->command_pool) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create command pool"); + goto err; + } + + return true; +err: + return false; +} + +static VkCommandBuffer +begin_single_time_commands(struct vgltf_renderer *renderer) { + VkCommandBufferAllocateInfo allocate_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = renderer->command_pool, + .commandBufferCount = 1}; + + VkCommandBuffer command_buffer; + vkAllocateCommandBuffers(renderer->device.device, &allocate_info, + &command_buffer); + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + vkBeginCommandBuffer(command_buffer, &begin_info); + + return command_buffer; +} + +static void end_single_time_commands(struct vgltf_renderer *renderer, + VkCommandBuffer command_buffer) { + vkEndCommandBuffer(command_buffer); + VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &command_buffer}; + + vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info, + VK_NULL_HANDLE); + vkQueueWaitIdle(renderer->device.graphics_queue); + vkFreeCommandBuffers(renderer->device.device, renderer->command_pool, 1, + &command_buffer); +} + +static bool vgltf_renderer_copy_buffer(struct vgltf_renderer *renderer, + VkBuffer src_buffer, VkBuffer dst_buffer, + VkDeviceSize size) { + VkCommandBuffer command_buffer = begin_single_time_commands(renderer); + VkBufferCopy copy_region = {.size = size}; + vkCmdCopyBuffer(command_buffer, src_buffer, dst_buffer, 1, ©_region); + end_single_time_commands(renderer, command_buffer); + return true; +} + +static void vgltf_renderer_create_image( + struct vgltf_renderer *renderer, uint32_t width, uint32_t height, + uint32_t mip_level_count, VkFormat format, VkImageTiling tiling, + VkImageUsageFlags usage, VkMemoryPropertyFlags properties, + struct vgltf_renderer_allocated_image *image) { + + vmaCreateImage( + renderer->device.allocator, + &(const VkImageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent = {width, height, 1}, + .mipLevels = mip_level_count, + .arrayLayers = 1, + .format = format, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .samples = VK_SAMPLE_COUNT_1_BIT, + }, + &(const VmaAllocationCreateInfo){.usage = VMA_MEMORY_USAGE_GPU_ONLY, + .requiredFlags = properties}, + &image->image, &image->allocation, &image->info); +} + +static bool has_stencil_component(VkFormat format) { + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || + format == VK_FORMAT_D24_UNORM_S8_UINT; +} + +static bool transition_image_layout(struct vgltf_renderer *renderer, + VkImage image, VkFormat format, + VkImageLayout old_layout, + VkImageLayout new_layout, + uint32_t mip_level_count) { + (void)format; + VkCommandBuffer command_buffer = begin_single_time_commands(renderer); + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .oldLayout = old_layout, + .newLayout = new_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = mip_level_count, + .baseArrayLayer = 0, + .layerCount = 1}, + .srcAccessMask = 0, + .dstAccessMask = 0}; + + if (new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + + if (has_stencil_component(format)) { + barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + } else { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } + + VkPipelineStageFlags source_stage; + VkPipelineStageFlags destination_stage; + if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED && + new_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + } else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + source_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destination_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } else if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED && + new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destination_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + } else { + goto err; + } + + vkCmdPipelineBarrier(command_buffer, source_stage, destination_stage, 0, 0, + nullptr, 0, nullptr, 1, &barrier); + + end_single_time_commands(renderer, command_buffer); + return true; +err: + return false; +} + +void copy_buffer_to_image(struct vgltf_renderer *renderer, VkBuffer buffer, + VkImage image, uint32_t width, uint32_t height) { + VkCommandBuffer command_buffer = begin_single_time_commands(renderer); + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1}, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, 1}}; + + vkCmdCopyBufferToImage(command_buffer, buffer, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + end_single_time_commands(renderer, command_buffer); +} + +static bool +vgltf_renderer_create_depth_resources(struct vgltf_renderer *renderer) { + VkFormat depth_format = find_depth_format(renderer); + vgltf_renderer_create_image( + renderer, renderer->swapchain.swapchain_extent.width, + renderer->swapchain.swapchain_extent.height, 1, depth_format, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->depth_image); + create_image_view(&renderer->device, renderer->depth_image.image, + depth_format, &renderer->depth_image_view, + VK_IMAGE_ASPECT_DEPTH_BIT, 1); + + transition_image_layout(renderer, renderer->depth_image.image, depth_format, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1); + return true; +} + +static bool +vgltf_renderer_create_buffer(struct vgltf_renderer *renderer, VkDeviceSize size, + VkBufferUsageFlags usage, + VkMemoryPropertyFlags properties, + struct vgltf_renderer_allocated_buffer *buffer) { + VkBufferCreateInfo buffer_info = {.sType = + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = size, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE}; + VmaAllocationCreateInfo alloc_info = { + .usage = VMA_MEMORY_USAGE_AUTO, + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT, + .preferredFlags = properties}; + + if (vmaCreateBuffer(renderer->device.allocator, &buffer_info, &alloc_info, + &buffer->buffer, &buffer->allocation, + &buffer->info) != VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to create buffer"); + goto err; + } + + return true; +err: + return false; +} + +static void generate_mipmaps(struct vgltf_renderer *renderer, VkImage image, + VkFormat image_format, int32_t texture_width, + int32_t texture_height, uint32_t mip_levels) { + VkFormatProperties format_properties; + vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device, + image_format, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) { + VGLTF_PANIC("Texture image format does not support linear blitting!"); + } + + VkCommandBuffer command_buffer = begin_single_time_commands(renderer); + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .image = image, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = 1, + .levelCount = 1}}; + + int32_t mip_width = texture_width; + int32_t mip_height = texture_height; + + for (uint32_t i = 1; i < mip_levels; i++) { + barrier.subresourceRange.baseMipLevel = i - 1; + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + VkImageBlit blit = { + .srcOffsets = {{0, 0, 0}, {mip_width, mip_height, 1}}, + .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = i - 1, + .baseArrayLayer = 0, + .layerCount = 1}, + .dstOffsets = {{0, 0, 0}, + {mip_width > 1 ? mip_width / 2 : 1, + mip_height > 1 ? mip_height / 2 : 1, 1}}, + .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = i, + .baseArrayLayer = 0, + .layerCount = 1}, + }; + vkCmdBlitImage(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, + VK_FILTER_LINEAR); + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, + 0, nullptr, 1, &barrier); + if (mip_width > 1) + mip_width /= 2; + if (mip_height > 1) + mip_height /= 2; + } + barrier.subresourceRange.baseMipLevel = mip_levels - 1; + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + end_single_time_commands(renderer, command_buffer); +} + +static bool +vgltf_renderer_create_texture_image(struct vgltf_renderer *renderer) { + struct vgltf_image image; + if (!vgltf_image_load_from_file(&image, SV(TEXTURE_PATH))) { + VGLTF_LOG_ERR("Couldn't load image from file"); + goto err; + } + renderer->mip_level_count = + floor(log2(VGLTF_MAX(image.width, image.height))) + 1; + + VkDeviceSize image_size = image.width * image.height * 4; + struct vgltf_renderer_allocated_buffer staging_buffer = {}; + if (!vgltf_renderer_create_buffer(renderer, image_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &staging_buffer)) { + VGLTF_LOG_ERR("Couldn't create staging buffer"); + goto deinit_image; + } + + void *data; + vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); + memcpy(data, image.data, image_size); + vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + + vgltf_renderer_create_image( + renderer, image.width, image.height, renderer->mip_level_count, + VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->texture_image); + + transition_image_layout(renderer, renderer->texture_image.image, + VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + renderer->mip_level_count); + copy_buffer_to_image(renderer, staging_buffer.buffer, + renderer->texture_image.image, image.width, + image.height); + + generate_mipmaps(renderer, renderer->texture_image.image, + VK_FORMAT_R8G8B8A8_SRGB, image.width, image.height, + renderer->mip_level_count); + + vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, + staging_buffer.allocation); + vgltf_image_deinit(&image); + return true; +deinit_image: + vgltf_image_deinit(&image); +err: + return false; +} + +static bool +vgltf_renderer_create_texture_image_view(struct vgltf_renderer *renderer) { + return create_image_view( + &renderer->device, renderer->texture_image.image, VK_FORMAT_R8G8B8A8_SRGB, + &renderer->texture_image_view, VK_IMAGE_ASPECT_COLOR_BIT, + renderer->mip_level_count); +} + +static bool +vgltf_renderer_create_texture_sampler(struct vgltf_renderer *renderer) { + VkPhysicalDeviceProperties properties = {}; + vkGetPhysicalDeviceProperties(renderer->device.physical_device, &properties); + + VkSamplerCreateInfo sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_TRUE, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .mipLodBias = 0.f, + .minLod = 0.f, + .maxLod = renderer->mip_level_count}; + + if (vkCreateSampler(renderer->device.device, &sampler_info, nullptr, + &renderer->texture_sampler) != VK_SUCCESS) { + goto err; + } + + return true; +err: + return false; +} + +static void get_file_data(void *ctx, const char *filename, const int is_mtl, + const char *obj_filename, char **data, size_t *len) { + (void)ctx; + (void)is_mtl; + + if (!filename) { + VGLTF_LOG_ERR("Null filename"); + *data = NULL; + *len = 0; + return; + } + *data = vgltf_platform_read_file_to_string(obj_filename, len); +} + +static bool load_model(struct vgltf_renderer *renderer) { + tinyobj_attrib_t attrib; + tinyobj_shape_t *shapes = nullptr; + size_t shape_count; + tinyobj_material_t *materials = nullptr; + size_t material_count; + + if ((tinyobj_parse_obj(&attrib, &shapes, &shape_count, &materials, + &material_count, MODEL_PATH, get_file_data, nullptr, + TINYOBJ_FLAG_TRIANGULATE)) != TINYOBJ_SUCCESS) { + VGLTF_LOG_ERR("Couldn't load obj"); + return false; + } + + for (size_t shape_index = 0; shape_index < shape_count; shape_index++) { + tinyobj_shape_t *shape = &shapes[shape_index]; + unsigned int face_offset = shape->face_offset; + for (size_t face_index = face_offset; + face_index < face_offset + shape->length; face_index++) { + float v[3][3]; + float t[3][2]; + + tinyobj_vertex_index_t idx0 = attrib.faces[face_index * 3 + 0]; + tinyobj_vertex_index_t idx1 = attrib.faces[face_index * 3 + 1]; + tinyobj_vertex_index_t idx2 = attrib.faces[face_index * 3 + 2]; + + for (int k = 0; k < 3; k++) { + int f0 = idx0.v_idx; + int f1 = idx1.v_idx; + int f2 = idx2.v_idx; + + v[0][k] = attrib.vertices[3 * (size_t)f0 + k]; + v[1][k] = attrib.vertices[3 * (size_t)f1 + k]; + v[2][k] = attrib.vertices[3 * (size_t)f2 + k]; + } + + for (int k = 0; k < 2; k++) { + int t0 = idx0.vt_idx; + int t1 = idx1.vt_idx; + int t2 = idx2.vt_idx; + + t[0][k] = attrib.texcoords[2 * (size_t)t0 + k]; + t[1][k] = attrib.texcoords[2 * (size_t)t1 + k]; + t[2][k] = attrib.texcoords[2 * (size_t)t2 + k]; + } + + for (int k = 0; k < 3; k++) { + renderer->vertices[renderer->vertex_count++] = (struct vgltf_vertex){ + .position = {v[k][0], v[k][1], v[k][2]}, + .texture_coordinates = {t[k][0], 1.f - t[k][1]}, + .color = {1.f, 1.f, 1.f}}; + renderer->indices[renderer->index_count++] = renderer->index_count; + } + } + tinyobj_attrib_free(&attrib); + tinyobj_shapes_free(shapes, shape_count); + tinyobj_materials_free(materials, material_count); + } + return true; +} + +static bool +vgltf_renderer_create_vertex_buffer(struct vgltf_renderer *renderer) { + VkDeviceSize buffer_size = + renderer->vertex_count * sizeof(struct vgltf_vertex); + + struct vgltf_renderer_allocated_buffer staging_buffer = {}; + if (!vgltf_renderer_create_buffer(renderer, buffer_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &staging_buffer)) { + VGLTF_LOG_ERR("Failed to create transfer buffer"); + goto err; + } + + void *data; + vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); + memcpy(data, renderer->vertices, + renderer->vertex_count * sizeof(struct vgltf_vertex)); + vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + + if (!vgltf_renderer_create_buffer( + renderer, buffer_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->vertex_buffer)) { + VGLTF_LOG_ERR("Failed to create vertex buffer"); + goto destroy_staging_buffer; + } + + vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer, + renderer->vertex_buffer.buffer, buffer_size); + vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, + staging_buffer.allocation); + return true; +destroy_staging_buffer: + vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, + staging_buffer.allocation); +err: + return false; +} + +static bool +vgltf_renderer_create_index_buffer(struct vgltf_renderer *renderer) { + VkDeviceSize buffer_size = renderer->index_count * sizeof(uint16_t); + struct vgltf_renderer_allocated_buffer staging_buffer = {}; + if (!vgltf_renderer_create_buffer(renderer, buffer_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &staging_buffer)) { + VGLTF_LOG_ERR("Failed to create transfer buffer"); + goto err; + } + + void *data; + vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); + memcpy(data, renderer->indices, renderer->index_count * sizeof(uint16_t)); + vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + + if (!vgltf_renderer_create_buffer( + renderer, buffer_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->index_buffer)) { + VGLTF_LOG_ERR("Failed to create index buffer"); + goto destroy_staging_buffer; + } + vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer, + renderer->index_buffer.buffer, buffer_size); + vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, + staging_buffer.allocation); + return true; + +destroy_staging_buffer: + vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, + staging_buffer.allocation); +err: + return false; +} + +static bool +vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) { + VkCommandBufferAllocateInfo allocate_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = renderer->command_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; + + if (vkAllocateCommandBuffers(renderer->device.device, &allocate_info, + renderer->command_buffer) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't allocate command buffers"); + goto err; + } + + return true; +err: + return false; +} + +static bool +vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) { + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + + VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT}; + + int frame_in_flight_index = 0; + for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; + frame_in_flight_index++) { + if (vkCreateSemaphore( + renderer->device.device, &semaphore_info, nullptr, + &renderer->image_available_semaphores[frame_in_flight_index]) != + VK_SUCCESS || + vkCreateSemaphore( + renderer->device.device, &semaphore_info, nullptr, + &renderer->render_finished_semaphores[frame_in_flight_index]) != + VK_SUCCESS || + vkCreateFence(renderer->device.device, &fence_info, nullptr, + &renderer->in_flight_fences[frame_in_flight_index]) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create sync objects"); + goto err; + } + } + + return true; +err: + for (int frame_in_flight_to_delete_index = 0; + frame_in_flight_to_delete_index < frame_in_flight_index; + frame_in_flight_to_delete_index++) { + vkDestroyFence(renderer->device.device, + renderer->in_flight_fences[frame_in_flight_index], nullptr); + vkDestroySemaphore( + renderer->device.device, + renderer->render_finished_semaphores[frame_in_flight_index], nullptr); + vkDestroySemaphore( + renderer->device.device, + renderer->image_available_semaphores[frame_in_flight_index], nullptr); + } + return false; +} + +static bool vgltf_vk_swapchain_init(struct vgltf_vk_swapchain *swapchain, + struct vgltf_vk_device *device, + struct vgltf_vk_surface *surface, + struct vgltf_window_size *window_size) { + if (!create_swapchain(swapchain, device, surface, window_size)) { + VGLTF_LOG_ERR("Couldn't create swapchain"); + goto err; + } + + if (!create_swapchain_image_views(swapchain, device)) { + VGLTF_LOG_ERR("Couldn't create image views"); + goto destroy_swapchain; + } + + return true; +destroy_swapchain: + vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +err: + return false; +} + +static void vgltf_vk_swapchain_deinit(struct vgltf_vk_swapchain *swapchain, + struct vgltf_vk_device *device) { + for (uint32_t swapchain_image_view_index = 0; + swapchain_image_view_index < swapchain->swapchain_image_count; + swapchain_image_view_index++) { + vkDestroyImageView( + device->device, + swapchain->swapchain_image_views[swapchain_image_view_index], nullptr); + } + vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +} + +static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) { + vkDestroyImageView(renderer->device.device, renderer->depth_image_view, + nullptr); + vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image, + renderer->depth_image.allocation); + + for (uint32_t framebuffer_index = 0; + framebuffer_index < renderer->swapchain.swapchain_image_count; + framebuffer_index++) { + vkDestroyFramebuffer(renderer->device.device, + renderer->swapchain_framebuffers[framebuffer_index], + nullptr); + } + + vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device); +} + +static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) { + vkDeviceWaitIdle(renderer->device.device); + vgltf_renderer_cleanup_swapchain(renderer); + + // TODO add error handling + create_swapchain(&renderer->swapchain, &renderer->device, &renderer->surface, + &renderer->window_size); + create_swapchain_image_views(&renderer->swapchain, &renderer->device); + vgltf_renderer_create_depth_resources(renderer); + vgltf_renderer_create_framebuffers(renderer); + return true; +} + +static void vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer, + uint32_t swapchain_image_index) { + VkRenderPassBeginInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = renderer->render_pass, + .framebuffer = renderer->swapchain_framebuffers[swapchain_image_index], + .renderArea = {.offset = {}, + .extent = renderer->swapchain.swapchain_extent}, + .clearValueCount = 2, + .pClearValues = + (const VkClearValue[]){{.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}}, + {.depthStencil = {1.0f, 0}}}, + + }; + + vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame], + &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); + vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame], + VK_PIPELINE_BIND_POINT_GRAPHICS, + renderer->graphics_pipeline); + VkViewport viewport = { + .x = 0.f, + .y = 0.f, + .width = (float)renderer->swapchain.swapchain_extent.width, + .height = (float)renderer->swapchain.swapchain_extent.height, + .minDepth = 0.f, + .maxDepth = 1.f}; + vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1, + &viewport); + VkRect2D scissor = {.offset = {}, + .extent = renderer->swapchain.swapchain_extent}; + vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1, + &scissor); + + VkBuffer vertex_buffers[] = {renderer->vertex_buffer.buffer}; + VkDeviceSize offsets[] = {0}; + vkCmdBindVertexBuffers(renderer->command_buffer[renderer->current_frame], 0, + 1, vertex_buffers, offsets); + vkCmdBindIndexBuffer(renderer->command_buffer[renderer->current_frame], + renderer->index_buffer.buffer, 0, VK_INDEX_TYPE_UINT16); + + vkCmdBindDescriptorSets( + renderer->command_buffer[renderer->current_frame], + VK_PIPELINE_BIND_POINT_GRAPHICS, renderer->pipeline_layout, 0, 1, + &renderer->descriptor_sets[renderer->current_frame], 0, nullptr); + vkCmdDrawIndexed(renderer->command_buffer[renderer->current_frame], + renderer->index_count, 1, 0, 0, 0); + + vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]); +} + +static void update_uniform_buffer(struct vgltf_renderer *renderer, + uint32_t current_frame) { + static long long start_time_nanoseconds = 0; + if (start_time_nanoseconds == 0) { + if (!vgltf_platform_get_current_time_nanoseconds(&start_time_nanoseconds)) { + VGLTF_LOG_ERR("Couldn't get current time"); + } + } + + long long current_time_nanoseconds = 0; + if (!vgltf_platform_get_current_time_nanoseconds(¤t_time_nanoseconds)) { + VGLTF_LOG_ERR("Couldn't get current time"); + } + + long elapsed_time_nanoseconds = + current_time_nanoseconds - start_time_nanoseconds; + float elapsed_time_seconds = elapsed_time_nanoseconds / 1e9f; + VGLTF_LOG_INFO("Elapsed time: %f", elapsed_time_seconds); + + vgltf_mat4 model_matrix; + vgltf_mat4_rotate(model_matrix, (vgltf_mat4)VGLTF_MAT4_IDENTITY, + elapsed_time_seconds * VGLTF_MATHS_DEG_TO_RAD(90.0f), + (vgltf_vec3){0.f, 0.f, 1.f}); + + vgltf_mat4 view_matrix; + vgltf_mat4_look_at(view_matrix, (vgltf_vec3){2.f, 2.f, 2.f}, + (vgltf_vec3){0.f, 0.f, 0.f}, (vgltf_vec3){0.f, 0.f, 1.f}); + + vgltf_mat4 projection_matrix; + vgltf_mat4_perspective(projection_matrix, VGLTF_MATHS_DEG_TO_RAD(45.f), + (float)renderer->swapchain.swapchain_extent.width / + (float)renderer->swapchain.swapchain_extent.height, + 0.1f, 10.f); + projection_matrix[1 * 4 + 1] *= -1; + + struct vgltf_renderer_uniform_buffer_object ubo = {}; + memcpy(ubo.model, model_matrix, sizeof(vgltf_mat4)); + memcpy(ubo.view, view_matrix, sizeof(vgltf_mat4)); + memcpy(ubo.projection, projection_matrix, sizeof(vgltf_mat4)); + memcpy(renderer->mapped_uniform_buffers[current_frame], &ubo, sizeof(ubo)); +} + +bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer) { + vkWaitForFences(renderer->device.device, 1, + &renderer->in_flight_fences[renderer->current_frame], VK_TRUE, + UINT64_MAX); + + uint32_t image_index; + VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR( + renderer->device.device, renderer->swapchain.swapchain, UINT64_MAX, + renderer->image_available_semaphores[renderer->current_frame], + VK_NULL_HANDLE, &image_index); + if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR || + acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR || + renderer->framebuffer_resized) { + renderer->framebuffer_resized = false; + vgltf_renderer_recreate_swapchain(renderer); + return true; + } else if (acquire_swapchain_image_result != VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to acquire a swapchain image"); + goto err; + } + + vkResetFences(renderer->device.device, 1, + &renderer->in_flight_fences[renderer->current_frame]); + + vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0); + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + + if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame], + &begin_info) != VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to begin recording command buffer"); + goto err; + } + + vgltf_renderer_triangle_pass(renderer, image_index); + + if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to record command buffer"); + goto err; + } + + update_uniform_buffer(renderer, renderer->current_frame); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + }; + + VkSemaphore wait_semaphores[] = { + renderer->image_available_semaphores[renderer->current_frame]}; + VkPipelineStageFlags wait_stages[] = { + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = wait_semaphores; + submit_info.pWaitDstStageMask = wait_stages; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = + &renderer->command_buffer[renderer->current_frame]; + + VkSemaphore signal_semaphores[] = { + renderer->render_finished_semaphores[renderer->current_frame]}; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = signal_semaphores; + if (vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info, + renderer->in_flight_fences[renderer->current_frame]) != + VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to submit draw command buffer"); + goto err; + } + + VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .waitSemaphoreCount = 1, + .pWaitSemaphores = signal_semaphores}; + + VkSwapchainKHR swapchains[] = {renderer->swapchain.swapchain}; + present_info.swapchainCount = 1; + present_info.pSwapchains = swapchains; + present_info.pImageIndices = &image_index; + VkResult result = + vkQueuePresentKHR(renderer->device.present_queue, &present_info); + if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) { + vgltf_renderer_recreate_swapchain(renderer); + } else if (acquire_swapchain_image_result != VK_SUCCESS) { + VGLTF_LOG_ERR("Failed to acquire a swapchain image"); + goto err; + } + renderer->current_frame = + (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; + return true; +err: + return false; +} +static bool +vgltf_renderer_create_uniform_buffers(struct vgltf_renderer *renderer) { + VkDeviceSize buffer_size = + sizeof(struct vgltf_renderer_uniform_buffer_object); + + for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { + vgltf_renderer_create_buffer(renderer, buffer_size, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &renderer->uniform_buffers[i]); + vmaMapMemory(renderer->device.allocator, + renderer->uniform_buffers[i].allocation, + &renderer->mapped_uniform_buffers[i]); + } + + return true; +} + +static bool +vgltf_renderer_create_descriptor_pool(struct vgltf_renderer *renderer) { + VkDescriptorPoolSize pool_sizes[] = { + (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = + VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}, + (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = + VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}}; + int pool_size_count = sizeof(pool_sizes) / sizeof(pool_sizes[0]); + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = pool_size_count, + .pPoolSizes = pool_sizes, + .maxSets = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; + + if (vkCreateDescriptorPool(renderer->device.device, &pool_info, nullptr, + &renderer->descriptor_pool) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create uniform descriptor pool"); + goto err; + } + + return true; +err: + return false; +} +static bool +vgltf_renderer_create_descriptor_sets(struct vgltf_renderer *renderer) { + VkDescriptorSetLayout layouts[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT] = {}; + for (int layout_index = 0; + layout_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; + layout_index++) { + layouts[layout_index] = renderer->descriptor_set_layout; + } + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = renderer->descriptor_pool, + .descriptorSetCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT, + .pSetLayouts = layouts}; + + if (vkAllocateDescriptorSets(renderer->device.device, &alloc_info, + renderer->descriptor_sets) != VK_SUCCESS) { + VGLTF_LOG_ERR("Couldn't create descriptor sets"); + goto err; + } + + for (int set_index = 0; set_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; + set_index++) { + VkDescriptorBufferInfo buffer_info = { + .buffer = renderer->uniform_buffers[set_index].buffer, + .offset = 0, + .range = sizeof(struct vgltf_renderer_uniform_buffer_object)}; + + VkDescriptorImageInfo image_info = { + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = renderer->texture_image_view, + .sampler = renderer->texture_sampler, + }; + + VkWriteDescriptorSet descriptor_writes[] = { + (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = renderer->descriptor_sets[set_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &buffer_info}, + + (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = renderer->descriptor_sets[set_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_info}}; + int descriptor_write_count = + sizeof(descriptor_writes) / sizeof(descriptor_writes[0]); + + vkUpdateDescriptorSets(renderer->device.device, descriptor_write_count, + descriptor_writes, 0, nullptr); + } + + return true; +err: + return false; +} + +static bool vgltf_vk_device_init(struct vgltf_vk_device *device, + struct vgltf_vk_instance *instance, + struct vgltf_vk_surface *surface) { + if (!pick_physical_device(&device->physical_device, instance, + surface->surface)) { + VGLTF_LOG_ERR("Couldn't pick physical device"); + goto err; + } + + if (!create_logical_device(&device->device, &device->graphics_queue, + &device->present_queue, device->physical_device, + surface->surface)) { + VGLTF_LOG_ERR("Couldn't pick logical device"); + goto err; + } + + if (!create_allocator(&device->allocator, device, instance)) { + VGLTF_LOG_ERR("Couldn't create allocator"); + goto destroy_logical_device; + } + + return true; +destroy_logical_device: + vkDestroyDevice(device->device, nullptr); +err: + return false; +} + +static void vgltf_vk_device_deinit(struct vgltf_vk_device *device) { + vmaDestroyAllocator(device->allocator); + vkDestroyDevice(device->device, nullptr); +} + +bool vgltf_renderer_init(struct vgltf_renderer *renderer, + struct vgltf_platform *platform) { + if (!vgltf_vk_instance_init(&renderer->instance, platform)) { + VGLTF_LOG_ERR("instance creation failed"); + goto err; + } + vgltf_renderer_setup_debug_messenger(renderer); + if (!vgltf_vk_surface_init(&renderer->surface, &renderer->instance, + platform)) { + goto destroy_instance; + } + + if (!vgltf_vk_device_init(&renderer->device, &renderer->instance, + &renderer->surface)) { + VGLTF_LOG_ERR("Device creation failed"); + goto destroy_surface; + } + + struct vgltf_window_size window_size = {800, 600}; + if (!vgltf_platform_get_window_size(platform, &window_size)) { + VGLTF_LOG_ERR("Couldn't get window size"); + goto destroy_device; + } + renderer->window_size = window_size; + + if (!vgltf_vk_swapchain_init(&renderer->swapchain, &renderer->device, + &renderer->surface, &renderer->window_size)) { + VGLTF_LOG_ERR("Couldn't create swapchain"); + goto destroy_device; + } + + if (!vgltf_renderer_create_render_pass(renderer)) { + VGLTF_LOG_ERR("Couldn't create render pass"); + goto destroy_swapchain; + } + + if (!vgltf_renderer_create_descriptor_set_layout(renderer)) { + VGLTF_LOG_ERR("Couldn't create descriptor set layout"); + goto destroy_render_pass; + } + + if (!vgltf_renderer_create_graphics_pipeline(renderer)) { + VGLTF_LOG_ERR("Couldn't create graphics pipeline"); + goto destroy_descriptor_set_layout; + } + + if (!vgltf_renderer_create_command_pool(renderer)) { + VGLTF_LOG_ERR("Couldn't create command pool"); + goto destroy_graphics_pipeline; + } + + if (!vgltf_renderer_create_depth_resources(renderer)) { + VGLTF_LOG_ERR("Couldn't create depth resources"); + goto destroy_command_pool; + } + + if (!vgltf_renderer_create_framebuffers(renderer)) { + VGLTF_LOG_ERR("Couldn't create framebuffers"); + goto destroy_depth_resources; + } + + if (!vgltf_renderer_create_texture_image(renderer)) { + VGLTF_LOG_ERR("Couldn't create texture image"); + goto destroy_frame_buffers; + } + + if (!vgltf_renderer_create_texture_image_view(renderer)) { + VGLTF_LOG_ERR("Couldn't create texture image view"); + goto destroy_texture_image; + } + + if (!vgltf_renderer_create_texture_sampler(renderer)) { + VGLTF_LOG_ERR("Couldn't create texture sampler"); + goto destroy_texture_image_view; + } + + if (!load_model(renderer)) { + VGLTF_LOG_ERR("Couldn't load model"); + goto destroy_texture_sampler; + } + + if (!vgltf_renderer_create_vertex_buffer(renderer)) { + VGLTF_LOG_ERR("Couldn't create vertex buffer"); + goto destroy_model; + } + + if (!vgltf_renderer_create_index_buffer(renderer)) { + VGLTF_LOG_ERR("Couldn't create index buffer"); + goto destroy_vertex_buffer; + } + + if (!vgltf_renderer_create_uniform_buffers(renderer)) { + VGLTF_LOG_ERR("Couldn't create uniform buffers"); + goto destroy_index_buffer; + } + + if (!vgltf_renderer_create_descriptor_pool(renderer)) { + VGLTF_LOG_ERR("Couldn't create descriptor pool"); + goto destroy_uniform_buffers; + } + + if (!vgltf_renderer_create_descriptor_sets(renderer)) { + VGLTF_LOG_ERR("Couldn't create descriptor sets"); + goto destroy_descriptor_pool; + } + + if (!vgltf_renderer_create_command_buffer(renderer)) { + VGLTF_LOG_ERR("Couldn't create command buffer"); + goto destroy_descriptor_pool; + } + + if (!vgltf_renderer_create_sync_objects(renderer)) { + VGLTF_LOG_ERR("Couldn't create sync objects"); + goto destroy_descriptor_pool; + } + + return true; + +destroy_descriptor_pool: + vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool, + nullptr); +destroy_uniform_buffers: + for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { + vmaDestroyBuffer(renderer->device.allocator, + renderer->uniform_buffers[i].buffer, + renderer->uniform_buffers[i].allocation); + } +destroy_index_buffer: + vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer, + renderer->index_buffer.allocation); +destroy_vertex_buffer: + vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer, + renderer->vertex_buffer.allocation); +destroy_model: + // TODO +destroy_texture_sampler: + vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr); +destroy_texture_image_view: + vkDestroyImageView(renderer->device.device, renderer->texture_image_view, + nullptr); +destroy_texture_image: + vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image, + renderer->texture_image.allocation); +destroy_depth_resources: + vkDestroyImageView(renderer->device.device, renderer->depth_image_view, + nullptr); + vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image, + renderer->depth_image.allocation); +destroy_command_pool: + vkDestroyCommandPool(renderer->device.device, renderer->command_pool, + nullptr); +destroy_frame_buffers: + for (uint32_t swapchain_framebuffer_index = 0; + swapchain_framebuffer_index < renderer->swapchain.swapchain_image_count; + swapchain_framebuffer_index++) { + vkDestroyFramebuffer( + renderer->device.device, + renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr); + } +destroy_graphics_pipeline: + vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline, + nullptr); + vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, + nullptr); +destroy_descriptor_set_layout: + vkDestroyDescriptorSetLayout(renderer->device.device, + renderer->descriptor_set_layout, nullptr); +destroy_render_pass: + vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr); +destroy_swapchain: + vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device); +destroy_device: + vgltf_vk_device_deinit(&renderer->device); +destroy_surface: + vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance); +destroy_instance: + if (enable_validation_layers) { + destroy_debug_utils_messenger_ext(renderer->instance.instance, + renderer->debug_messenger, nullptr); + } + vgltf_vk_instance_deinit(&renderer->instance); +err: + return false; +} +void vgltf_renderer_deinit(struct vgltf_renderer *renderer) { + vkDeviceWaitIdle(renderer->device.device); + vgltf_renderer_cleanup_swapchain(renderer); + for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { + vmaUnmapMemory(renderer->device.allocator, + renderer->uniform_buffers[i].allocation); + vmaDestroyBuffer(renderer->device.allocator, + renderer->uniform_buffers[i].buffer, + renderer->uniform_buffers[i].allocation); + } + vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer, + renderer->index_buffer.allocation); + vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer, + renderer->vertex_buffer.allocation); + vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr); + vkDestroyImageView(renderer->device.device, renderer->texture_image_view, + nullptr); + vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image, + renderer->texture_image.allocation); + vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline, + nullptr); + vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, + nullptr); + vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool, + nullptr); + vkDestroyDescriptorSetLayout(renderer->device.device, + renderer->descriptor_set_layout, nullptr); + vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr); + for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { + vkDestroySemaphore(renderer->device.device, + renderer->image_available_semaphores[i], nullptr); + vkDestroySemaphore(renderer->device.device, + renderer->render_finished_semaphores[i], nullptr); + vkDestroyFence(renderer->device.device, renderer->in_flight_fences[i], + nullptr); + } + vkDestroyCommandPool(renderer->device.device, renderer->command_pool, + nullptr); + vgltf_vk_device_deinit(&renderer->device); + vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance); + if (enable_validation_layers) { + destroy_debug_utils_messenger_ext(renderer->instance.instance, + renderer->debug_messenger, nullptr); + } + vgltf_vk_instance_deinit(&renderer->instance); +} +void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, + struct vgltf_window_size size) { + if (size.width > 0 && size.height > 0 && + size.width != renderer->window_size.width && + size.height != renderer->window_size.height) { + renderer->window_size = size; + renderer->framebuffer_resized = true; + } +} diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h new file mode 100644 index 0000000..79e1f3d --- /dev/null +++ b/src/renderer/renderer.h @@ -0,0 +1,126 @@ +#ifndef VGLTF_RENDERER_H +#define VGLTF_RENDERER_H + +#include "../maths.h" +#include "../platform.h" +#include "vma_usage.h" +#include + +struct vgltf_vertex { + vgltf_vec3 position; + vgltf_vec3 color; + vgltf_vec2 texture_coordinates; +}; +VkVertexInputBindingDescription vgltf_vertex_binding_description(void); + +struct vgltf_vertex_input_attribute_descriptions { + VkVertexInputAttributeDescription descriptions[3]; + uint32_t count; +}; +struct vgltf_vertex_input_attribute_descriptions +vgltf_vertex_attribute_descriptions(void); + +struct vgltf_renderer_uniform_buffer_object { + alignas(16) vgltf_mat4 model; + alignas(16) vgltf_mat4 view; + alignas(16) vgltf_mat4 projection; +}; + +struct vgltf_renderer_allocated_buffer { + VkBuffer buffer; + VmaAllocation allocation; + VmaAllocationInfo info; +}; + +struct vgltf_renderer_allocated_image { + VkImage image; + VmaAllocation allocation; + VmaAllocationInfo info; +}; + +struct vgltf_vk_instance { + VkInstance instance; +}; + +struct vgltf_vk_device { + VkPhysicalDevice physical_device; + VkDevice device; + VkQueue graphics_queue; + VkQueue present_queue; + VmaAllocator allocator; +}; + +struct vgltf_vk_surface { + VkSurfaceKHR surface; +}; + +constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32; +struct vgltf_vk_swapchain { + VkSwapchainKHR swapchain; + VkFormat swapchain_image_format; + VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; + VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; + VkExtent2D swapchain_extent; + uint32_t swapchain_image_count; +}; + +struct vgltf_vk_pipeline { + VkPipelineLayout layout; + VkPipeline pipeline; +}; + +constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2; +struct vgltf_renderer { + struct vgltf_vk_instance instance; + struct vgltf_vk_device device; + VkDebugUtilsMessengerEXT debug_messenger; + struct vgltf_vk_surface surface; + struct vgltf_vk_swapchain swapchain; + struct vgltf_renderer_allocated_image depth_image; + VkImageView depth_image_view; + + VkRenderPass render_pass; + VkDescriptorSetLayout descriptor_set_layout; + + VkDescriptorPool descriptor_pool; + VkDescriptorSet descriptor_sets[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + VkPipelineLayout pipeline_layout; + VkPipeline graphics_pipeline; + + VkFramebuffer swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; + + VkCommandPool command_pool; + VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + VkSemaphore + image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + VkSemaphore + render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + + struct vgltf_renderer_allocated_buffer + uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + void *mapped_uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + + uint32_t mip_level_count; + struct vgltf_renderer_allocated_image texture_image; + VkImageView texture_image_view; + VkSampler texture_sampler; + struct vgltf_vertex vertices[100000]; + int vertex_count; + uint16_t indices[100000]; + int index_count; + struct vgltf_renderer_allocated_buffer vertex_buffer; + struct vgltf_renderer_allocated_buffer index_buffer; + + struct vgltf_window_size window_size; + uint32_t current_frame; + bool framebuffer_resized; +}; +bool vgltf_renderer_init(struct vgltf_renderer *renderer, + struct vgltf_platform *platform); +void vgltf_renderer_deinit(struct vgltf_renderer *renderer); +bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer); +void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, + struct vgltf_window_size size); + +#endif // VGLTF_RENDERER_H diff --git a/src/renderer/vma_usage.cpp b/src/renderer/vma_usage.cpp new file mode 100644 index 0000000..83006a1 --- /dev/null +++ b/src/renderer/vma_usage.cpp @@ -0,0 +1,4 @@ +#include "vma_usage.h" + +#define VMA_IMPLEMENTATION +#include diff --git a/src/renderer/vma_usage.h b/src/renderer/vma_usage.h new file mode 100644 index 0000000..e9b5aa4 --- /dev/null +++ b/src/renderer/vma_usage.h @@ -0,0 +1,6 @@ +#ifndef VGLTF_VMA_USAGE_H +#define VGLTF_VMA_USAGE_H + +#include + +#endif // VGLTF_VMA_USAGE_H diff --git a/src/str.c b/src/str.c new file mode 100644 index 0000000..9c68d43 --- /dev/null +++ b/src/str.c @@ -0,0 +1,181 @@ +#include "str.h" +#include "alloc.h" +#include "hash.h" +#include "platform.h" +#include +#include +#include + +struct vgltf_string_view vgltf_string_view_from_literal(const char *str) { + assert(str); + size_t length = strlen(str); + return (struct vgltf_string_view){.length = length, .data = str}; +} +struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string) { + return (struct vgltf_string_view){.length = string.length, .data = string.data}; +} +char vgltf_string_view_at(const struct vgltf_string_view *string_view, + size_t index) { + assert(string_view); + assert(index < string_view->length); + return string_view->data[index]; +} +bool vgltf_string_view_eq(struct vgltf_string_view view, + struct vgltf_string_view other) { + return view.length == other.length && + (strncmp(view.data, other.data, view.length) == 0); +} +size_t vgltf_string_view_length(const struct vgltf_string_view *string_view) { + assert(string_view); + return string_view->length; +} + +uint64_t vgltf_string_view_hash(const struct vgltf_string_view view) { + return vgltf_hash_fnv_1a(view.data, view.length); +} + +int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view, + size_t offset, + uint32_t *codepoint) { + assert(codepoint); + assert(offset < view.length); + + const unsigned char *s = (unsigned char *)&view.data[offset]; + + int size; + if ((*s & 0x80) == 0) { + *codepoint = *s; + size = 1; + } else if ((*s & 0xE0) == 0xC0) { + *codepoint = *s & 0x1f; + size = 2; + } else if ((*s & 0xF0) == 0xE0) { + *codepoint = *s & 0x0f; + size = 3; + } else if ((*s & 0xF8) == 0xF0) { + *codepoint = *s & 0x07; + size = 4; + } else { + VGLTF_LOG_ERR("Invalid UTF-8 sequence"); + return 0; + } + + for (int i = 1; i < size; i++) { + if ((s[i] & 0xC0) != 0x80) { + VGLTF_LOG_ERR("Invalid UTF-8 continuation byte"); + return 0; + } + + *codepoint = (*codepoint << 6) | (s[i] & 0x3F); + } + + return size; +} +int vgltf_string_utf8_encode_codepoint(uint32_t codepoint, + char encoded_codepoint[4]) { + assert(encoded_codepoint); + if (codepoint > 0x10FFFF) { + return -1; + } + + if (codepoint <= 0x7F) { + encoded_codepoint[0] = (uint8_t)codepoint; + return 1; + } else if (codepoint <= 0x7FF) { + encoded_codepoint[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + encoded_codepoint[1] = 0x80 | (codepoint & 0x3F); + return 2; + } else if (codepoint <= 0xFFFF) { + encoded_codepoint[0] = 0xE0 | ((codepoint >> 12) & 0x0F); + encoded_codepoint[1] = 0x80 | ((codepoint >> 6) & 0x3F); + encoded_codepoint[2] = 0x80 | (codepoint & 0x3F); + return 3; + } else { + encoded_codepoint[0] = 0xF0 | ((codepoint >> 18) & 0x07); + encoded_codepoint[1] = 0x80 | ((codepoint >> 12) & 0x3F); + encoded_codepoint[2] = 0x80 | ((codepoint >> 6) & 0x3F); + encoded_codepoint[3] = 0x80 | (codepoint & 0x3F); + return 4; + } +} + +struct vgltf_string +vgltf_string_from_null_terminated(struct vgltf_allocator *allocator, + const char *str) { + assert(allocator); + assert(str); + struct vgltf_string string; + size_t length = strlen(str); + char *data = vgltf_allocator_allocate(allocator, length + 1); + if (!data) { + VGLTF_PANIC("Couldn't allocate string"); + } + strncpy(data, str, length); + string.length = length; + string.data = data; + return string; +} +struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator, + const struct vgltf_string string) { + assert(allocator); + + size_t length = string.length; + char *data = vgltf_allocator_allocate(allocator, length + 1); + memcpy(data, string.data, length); + data[length] = '\0'; + + return (struct vgltf_string){.data = data, .length = length}; +} +struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator, + struct vgltf_string_view head, + struct vgltf_string_view tail) { + assert(allocator); + size_t length = head.length + tail.length; + char *data = vgltf_allocator_allocate(allocator, length + 1); + memcpy(data, head.data, head.length); + memcpy(data + head.length, tail.data, tail.length); + data[length] = '\0'; + return (struct vgltf_string){.data = data, .length = length}; +} +struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator, + struct vgltf_string_view fmt, ...) { + va_list args; + va_start(args, fmt); + struct vgltf_string formatted_string = + vgltf_string_vformatted(allocator, fmt, args); + va_end(args); + + return formatted_string; +} +struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator, + struct vgltf_string_view fmt, + va_list args) { + assert(allocator); + char str[1024]; + size_t length = vsnprintf(str, 1024, fmt.data, args); + char *data = vgltf_allocator_allocate(allocator, length + 1); + memcpy(data, str, length); + data[length] = '\0'; + return (struct vgltf_string){.data = data, .length = length}; +} +void vgltf_string_deinit(struct vgltf_allocator *allocator, + struct vgltf_string *string) { + assert(allocator); + assert(string); + vgltf_allocator_free(allocator, string->data); +} +size_t vgltf_string_length(const struct vgltf_string *string) { + return string->length; +} +bool vgltf_string_eq_view(const struct vgltf_string string, + const struct vgltf_string_view view) { + return string.length == view.length && + (strncmp(string.data, view.data, string.length) == 0); +} +uint64_t vgltf_string_hash(const struct vgltf_string string) { + return vgltf_hash_fnv_1a(string.data, string.length); +} +bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other) { + return string.length == other.length && + (strncmp(string.data, other.data, string.length) == 0); +} diff --git a/src/str.h b/src/str.h new file mode 100644 index 0000000..c0e4e5c --- /dev/null +++ b/src/str.h @@ -0,0 +1,62 @@ +#ifndef VGLTF_STR_H +#define VGLTF_STR_H + +#include "alloc.h" +#include +#include +#include +#include +#include // IWYU pragma: keep + +#define SV(str) \ + (struct vgltf_string_view) { .data = str, .length = strlen(str) } + +struct vgltf_string; +struct vgltf_string_view { + const char *data; + size_t length; +}; + +struct vgltf_string_view vgltf_string_view_from_literal(const char *str); +struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string); +size_t vgltf_string_view_length(const struct vgltf_string_view *string_view); +char vgltf_string_view_at(const struct vgltf_string_view *string_view, + size_t index); +bool vgltf_string_view_eq(struct vgltf_string_view view, + struct vgltf_string_view other); +uint64_t vgltf_string_view_hash(const struct vgltf_string_view view); +// Fetches the next utf8 codepoint in the string at the given offset +// Returns the size of the codepoint in bytes, 0 in case of error +int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view, + size_t offset, + uint32_t *codepoint); +// codepoint has to be a char[4] +int vgltf_string_utf8_encode_codepoint(uint32_t codepoint, + char encoded_codepoint[4]); + +struct vgltf_string { + char *data; + size_t length; +}; +struct vgltf_string +vgltf_string_from_null_terminated(struct vgltf_allocator *allocator, + const char *str); +struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator, + const struct vgltf_string string); +struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator, + struct vgltf_string_view head, + struct vgltf_string_view tail); +struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator, + struct vgltf_string_view fmt, ...); +struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator, + struct vgltf_string_view fmt, + va_list args); +void vgltf_string_deinit(struct vgltf_allocator *allocator, + struct vgltf_string *string); +size_t vgltf_string_length(const struct vgltf_string *string); +bool vgltf_string_eq_view(const struct vgltf_string string, + const struct vgltf_string_view view); +uint64_t vgltf_string_hash(const struct vgltf_string string); +bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other); + +#endif // VGLTF_STR_H diff --git a/thirdpartylicenses.md b/thirdpartylicenses.md new file mode 100644 index 0000000..3d898b6 --- /dev/null +++ b/thirdpartylicenses.md @@ -0,0 +1,46 @@ +# stb_image: +Public Domain + +# vk_mem_alloc: +Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +# tinyobjloader_c: +The MIT License (MIT) + +Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/stb_image.h b/vendor/stb_image.h new file mode 100644 index 0000000..9eedabe --- /dev/null +++ b/vendor/stb_image.h @@ -0,0 +1,7988 @@ +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.30 (2024-05-31) avoid erroneous gcc warning + 2.29 (2023-05-xx) optimizations + 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff + 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes + 2.26 (2020-07-13) many minor fixes + 2.25 (2020-02-02) fix warnings + 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically + 2.23 (2019-08-11) fix clang static analysis warning + 2.22 (2019-03-04) gif fixes, fix warnings + 2.21 (2019-02-25) fix typo in comment + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine Simon Breuss (16-bit PNM) + John-Mark Allen + Carmelo J Fdez-Aguera + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski + Phil Jordan Dave Moore Roy Eltham + Hayaki Saito Nathan Reed Won Chun + Luke Graham Johan Duparc Nick Verigakis the Horde3D community + Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Eugene Golushkov Laurent Gomila Cort Stratton github:snagar + Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex + Cass Everitt Ryamond Barbiero github:grim210 + Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw + Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus + Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo + Julian Raschke Gregory Mullen Christian Floisand github:darealshinji + Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 + Brad Weinberger Matvey Cherevko github:mosra + Luca Sas Alexander Veselov Zack Middleton [reserved] + Ryan C. Gordon [reserved] [reserved] + DO NOT ADD YOUR NAME HERE + + Jacko Dirks + + To add your name to the credits, pick a random blank space in the middle and fill it. + 80% of merge conflicts on stb PRs are due to people adding their name at the end + of the credits. +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data); +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// To query the width, height and component count of an image without having to +// decode the full file, you can use the stbi_info family of functions: +// +// int x,y,n,ok; +// ok = stbi_info(filename, &x, &y, &n); +// // returns ok=1 and sets x, y, n if image is a supported format, +// // 0 otherwise. +// +// Note that stb_image pervasively uses ints in its public API for sizes, +// including sizes of memory buffers. This is now part of the API and thus +// hard to change without causing breakage. As a result, the various image +// loaders all have certain limits on image size; these differ somewhat +// by format but generally boil down to either just under 2GB or just under +// 1GB. When the decoded image would be larger than this, stb_image decoding +// will fail. +// +// Additionally, stb_image will reject image files that have any of their +// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, +// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, +// the only way to have an image with such dimensions load correctly +// is for it to have a rather extreme aspect ratio. Either way, the +// assumption here is that such larger images are likely to be malformed +// or malicious. If you do need to load an image with individual dimensions +// larger than that, and it still fits in the overall size limit, you can +// #define STBI_MAX_DIMENSIONS on your own to be something larger. +// +// =========================================================================== +// +// UNICODE: +// +// If compiling for Windows and you wish to use Unicode filenames, compile +// with +// #define STBI_WINDOWS_UTF8 +// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +// Windows wchar_t filenames to utf8. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy-to-use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// provide more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// We optionally support converting iPhone-formatted PNGs (which store +// premultiplied BGRA) back to RGB, even though they're internally encoded +// differently. To enable this conversion, call +// stbi_convert_iphone_png_to_rgb(1). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +// than that size (in either width or height) without further processing. +// This is to let programs in the wild set an upper bound to prevent +// denial-of-service attacks on untrusted data, as one could generate a +// valid image of gigantic dimensions and force stb_image to allocate a +// huge block of memory and spend disproportionate time decoding it. By +// default this is set to (1 << 24), which is 16777216, but that's still +// very big. + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +#include +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef STBIDEF +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// on most compilers (and ALL modern mainstream compilers) this is threadsafe +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + +#ifndef STBI_NO_THREAD_LOCALS + #if defined(__cplusplus) && __cplusplus >= 201103L + #define STBI_THREAD_LOCAL thread_local + #elif defined(__GNUC__) && __GNUC__ < 5 + #define STBI_THREAD_LOCAL __thread + #elif defined(_MSC_VER) + #define STBI_THREAD_LOCAL __declspec(thread) + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) + #define STBI_THREAD_LOCAL _Thread_local + #endif + + #ifndef STBI_THREAD_LOCAL + #if defined(__GNUC__) + #define STBI_THREAD_LOCAL __thread + #endif + #endif +#endif + +#if defined(_MSC_VER) || defined(__SYMBIAN32__) +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#endif + +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif + +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +#ifdef _MSC_VER +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name +#else +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + int callback_already_read; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + int ch; + fseek((FILE*) user, n, SEEK_CUR); + ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ + if (ch != EOF) { + ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ + } +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user) || ferror((FILE *) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pnm_is16(stbi__context *s); +#endif + +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +#ifndef STBI_NO_FAILURE_STRINGS +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} +#endif + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. +static int stbi__addints_valid(int a, int b) +{ + if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow + if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. + return a <= INT_MAX - b; +} + +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) +{ + if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow + if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid + if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN + return a >= SHRT_MIN / b; +} + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load_global = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_global = flag_true_if_should_flip; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_local = flag_true_if_should_flip; + stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ + ? stbi__vertically_flip_on_load_local \ + : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + // test the formats with a very explicit header first (at least a FOURCC + // or distinctive magic number first) + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #else + STBI_NOTUSED(bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + + // then the formats that can end up attempting to load with just 1 or 2 + // bytes matching expectations; these are prone to false positives, so + // try them later + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} +#endif + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 8) { + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 16) { + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else +static void stbi__skip(stbi__context *s, int n) +{ + if (n == 0) return; // already there! + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} +#endif + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + z += (stbi__uint32)stbi__get16le(s) << 16; + return z; +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + } + if (n < comp) { + for (i=0; i < x*y; ++i) { + output[i*comp + n] = data[i*comp + n]/255.0f; + } + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) { + for (j=0; j < count[i]; ++j) { + h->size[k++] = (stbi_uc) (i+1); + if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); + } + } + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + if(c < 0 || c >= 256) // symbol id out of bounds! + return -1; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing + + sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & (sgn - 1)); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + diff = t ? stbi__extend_receive(j, t) : 0; + + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + data[0] = (short) (dc * (1 << j->succ_low)); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * (1 << shift)); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios + // and I've never seen a non-corrupted JPEG file actually use them + for (i=0; i < s->img_n; ++i) { + if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); + if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +{ + // some JPEGs have junk at end, skip over it but if we find what looks + // like a valid marker, resume there + while (!stbi__at_eof(j->s)) { + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker + if (stbi__at_eof(j->s)) return STBI__MARKER_none; + x = stbi__get8(j->s); + if (x != 0x00 && x != 0xff) { + // not a stuffed zero or lead-in to another marker, looks + // like an actual marker, return it + return x; + } + // stuffed zero has x=0 now which ends the loop, meaning we go + // back to regular scan loop. + // repeated 0xff keeps trying to read the next byte of the marker. + } + } + return STBI__MARKER_none; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + j->marker = stbi__skip_jpeg_junk_at_end(j); + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + m = stbi__get_marker(j); + if (STBI__RESTART(m)) + m = stbi__get_marker(j); + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + m = stbi__get_marker(j); + } else { + if (!stbi__process_marker(j, m)) return 1; + m = stbi__get_marker(j); + } + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // nothing to do if no components requested; check this now to avoid + // accessing uninitialized coutput[0] later + if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__errpuc("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[STBI__ZNSYMS]; + stbi__uint16 value[STBI__ZNSYMS]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + int hit_zeof_once; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ + return (z->zbuffer >= z->zbuffer_end); +} + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + return stbi__zeof(z) ? 0 : *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + if (z->code_buffer >= (1U << z->num_bits)) { + z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ + return; + } + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s >= 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! + if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); + } + } + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + unsigned int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (unsigned int) (z->zout - z->zout_start); + limit = old_limit = (unsigned) (z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); + while (cur + n > limit) { + if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); + limit *= 2; + } + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } + return 1; + } + if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (len > a->zout_end - zout) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) { + c = stbi__zreceive(a,3)+3; + } else if (c == 18) { + c = stbi__zreceive(a,7)+11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + a->hit_zeof_once = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub +}; + +static int stbi__paeth(int a, int b, int c) +{ + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16 ? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + + for (j=0; j < y; ++j) { + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; + int filter = *raw++; + + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; + } + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; + } + + raw += nk; + + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; + + // expand bits to bytes first + if (depth == 4) { + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; + } + } else if (depth == 2) { + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; + } + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; + } + } + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); + if (img_n == 1) { + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; + } + } else { + STBI_ASSERT(img_n == 3); + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; + } + } + } + } + } + + STBI_FREE(filter_buf); + if (!all_ok) return 0; + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + if (!final) return stbi__err("outofmem", "Out of memory"); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load_global = 0; +static int stbi__de_iphone_flag_global = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_global = flag_true_if_should_convert; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global +#define stbi__de_iphone_flag stbi__de_iphone_flag_global +#else +static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; +static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; + +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; + stbi__unpremultiply_on_load_set = 1; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_local = flag_true_if_should_convert; + stbi__de_iphone_flag_set = 1; +} + +#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ + ? stbi__unpremultiply_on_load_local \ + : stbi__unpremultiply_on_load_global) +#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ + ? stbi__de_iphone_flag_local \ + : stbi__de_iphone_flag_global) +#endif // STBI_THREAD_LOCAL + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]={0}; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); + s->img_y = stbi__get32be(s); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + } + // even with SCAN_header, have to scan to see if we have a tRNS + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. + if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } + if (z->depth == 16) { + for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning + tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n && k < 3; ++k) + tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { + // header scan definitely stops at first IDAT + if (pal_img_n) + s->img_n = pal_img_n; + return 1; + } + if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth <= 8) + ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; + else + return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) { n += 16; z >>= 16; } + if (z >= 0x00100) { n += 8; z >>= 8; } + if (z >= 0x00010) { n += 4; z >>= 4; } + if (z >= 0x00004) { n += 2; z >>= 2; } + if (z >= 0x00002) { n += 1;/* >>= 1;*/ } + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; + int extra_read; +} stbi__bmp_data; + +static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) +{ + // BI_BITFIELDS specifies masks explicitly, don't override + if (compress == 3) + return 1; + + if (compress == 0) { + if (info->bpp == 16) { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } else if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + // otherwise, use defaults, which is all-0 + info->mr = info->mg = info->mb = info->ma = 0; + } + return 1; + } + return 0; // error +} + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + info->extra_read = 14; + + if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes + if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + stbi__bmp_set_mask_defaults(info, compress); + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->extra_read += 12; + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + // V4/V5 header + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs + stbi__bmp_set_mask_defaults(info, compress); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - info.extra_read - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - info.extra_read - info.hsz) >> 2; + } + if (psize == 0) { + // accept some number of extra bytes after the header, but if the offset points either to before + // the header ends or implies a large amount of extra data, reject the file as malformed + int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); + int header_limit = 1024; // max we actually read is below 256 bytes currently. + int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. + if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { + return stbi__errpuc("bad header", "Corrupt BMP"); + } + // we established that bytes_read_so_far is positive and sensible. + // the first half of this test rejects offsets that are either too small positives, or + // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn + // ensures the number computed in the second half of the test can't overflow. + if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { + return stbi__errpuc("bad offset", "Corrupt BMP"); + } else { + stbi__skip(s, info.offset - bytes_read_so_far); + } + } + + if (info.bpp == 24 && ma == 0xff000000) + s->img_n = 3; + else + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - info.extra_read - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i]; p1[i] = p2[i]; p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + STBI_NOTUSED(tga_x_origin); // @TODO + STBI_NOTUSED(tga_y_origin); // @TODO + + if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + if (tga_palette_len == 0) { /* you have to have at least one entry! */ + STBI_FREE(tga_data); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + STBI_NOTUSED(tga_palette_start); + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + + if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + if (!result) return stbi__errpuc("outofmem", "Out of memory"); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!g) return stbi__err("outofmem", "Out of memory"); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + STBI_NOTUSED(req_comp); + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) + return stbi__errpuc("too large", "GIF image is too large"); + pcount = g->w * g->h; + g->out = (stbi_uc *) stbi__malloc(4 * pcount); + g->background = (stbi_uc *) stbi__malloc(4 * pcount); + g->history = (stbi_uc *) stbi__malloc(pcount); + if (!g->out || !g->background || !g->history) + return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "transparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to the color that was there the previous frame. + memset(g->out, 0x00, 4 * pcount); + memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) + memset(g->history, 0x00, pcount); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispose of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + // if the width of the specified rectangle is 0, that means + // we may not see *any* pixels or the image is malformed; + // to make sure this is caught, move the current y down to + // max_y (which is what out_gif_code checks). + if (w == 0) + g->cur_y = g->max_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (!o) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) +{ + STBI_FREE(g->out); + STBI_FREE(g->history); + STBI_FREE(g->background); + + if (out) STBI_FREE(out); + if (delays && *delays) STBI_FREE(*delays); + return stbi__errpuc("outofmem", "Out of memory"); +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + int out_size = 0; + int delays_size = 0; + + STBI_NOTUSED(out_size); + STBI_NOTUSED(delays_size); + + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); + if (!tmp) + return stbi__load_gif_main_outofmem(&g, out, delays); + else { + out = (stbi_uc*) tmp; + out_size = layers * stride; + } + + if (delays) { + int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); + if (!new_delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + *delays = new_delays; + delays_size = layers * sizeof(int); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (!out) + return stbi__load_gif_main_outofmem(&g, out, delays); + out_size = layers * stride; + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + if (!*delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + delays_size = layers * sizeof(int); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + STBI_NOTUSED(ri); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } else if (g.out) { + // if there was an error and we allocated an image buffer, free it! + STBI_FREE(g.out); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + if (p == NULL) { + stbi__rewind( s ); + return 0; + } + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) { + if (info.bpp == 24 && info.ma == 0xff000000) + *comp = 3; + else + *comp = info.ma ? 4 : 3; + } + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + STBI_NOTUSED(stbi__get32be(s)); + STBI_NOTUSED(stbi__get32be(s)); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); + if (ri->bits_per_channel == 0) + return 0; + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { + STBI_FREE(out); + return stbi__errpuc("bad PNM", "PNM file truncated"); + } + + if (req_comp && req_comp != s->img_n) { + if (ri->bits_per_channel == 16) { + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); + } else { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + } + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + if((value > 214748364) || (value == 214748364 && *c > '7')) + return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + if(*x == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + if (*y == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + if (maxv > 65535) + return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); + else if (maxv > 255) + return 16; + else + return 8; +} + +static int stbi__pnm_is16(stbi__context *s) +{ + if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) + return 1; + return 0; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_is16(s)) return 1; + #endif + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/vendor/tiny_obj_loader_c.h b/vendor/tiny_obj_loader_c.h new file mode 100644 index 0000000..09087fe --- /dev/null +++ b/vendor/tiny_obj_loader_c.h @@ -0,0 +1,1793 @@ +/* + The MIT License (MIT) + + Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#ifndef TINOBJ_LOADER_C_H_ +#define TINOBJ_LOADER_C_H_ + +/* @todo { Remove stddef dependency. size_t? } */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *name; + + float ambient[3]; + float diffuse[3]; + float specular[3]; + float transmittance[3]; + float emission[3]; + float shininess; + float ior; /* index of refraction */ + float dissolve; /* 1 == opaque; 0 == fully transparent */ + /* illumination model (see http://www.fileformat.info/format/material/) */ + int illum; + + int pad0; + + char *ambient_texname; /* map_Ka */ + char *diffuse_texname; /* map_Kd */ + char *specular_texname; /* map_Ks */ + char *specular_highlight_texname; /* map_Ns */ + char *bump_texname; /* map_bump, bump */ + char *displacement_texname; /* disp */ + char *alpha_texname; /* map_d */ +} tinyobj_material_t; + +typedef struct { + char *name; /* group name or object name. */ + unsigned int face_offset; + unsigned int length; +} tinyobj_shape_t; + +typedef struct { + int v_idx, vt_idx, vn_idx; +} tinyobj_vertex_index_t; + +typedef struct { + unsigned int num_vertices; + unsigned int num_normals; + unsigned int num_texcoords; + unsigned int num_faces; + unsigned int num_face_num_verts; + + int pad0; + + float *vertices; + float *normals; + float *texcoords; + tinyobj_vertex_index_t *faces; + int *face_num_verts; + int *material_ids; +} tinyobj_attrib_t; + +#define TINYOBJ_FLAG_TRIANGULATE (1 << 0) + +#define TINYOBJ_INVALID_INDEX (0x80000000) + +#define TINYOBJ_SUCCESS (0) +#define TINYOBJ_ERROR_EMPTY (-1) +#define TINYOBJ_ERROR_INVALID_PARAMETER (-2) +#define TINYOBJ_ERROR_FILE_OPERATION (-3) + +/* Provide a callback that can read text file without any parsing or + * modification. The obj and mtl parser is going to read all the necessary data: + * tinyobj_parse_obj + * tinyobj_parse_mtl_file + * + * @param[in] ctx User provided context. + * @param[in] filename Filename to be loaded. + * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj + * @param[in] obj_filename .obj filename. Useful when you load .mtl from same + * location of .obj. When the callback is called to load .obj, `filename` and + * `obj_filename` are same. + * @param[out] buf Content of loaded file + * @param[out] len Size of content(file) + */ +typedef void (*file_reader_callback)(void *ctx, const char *filename, + int is_mtl, const char *obj_filename, + char **buf, size_t *len); + +/* Parse wavefront .obj + * @param[out] attrib Attibutes + * @param[out] shapes Array of parsed shapes + * @param[out] num_shapes Array length of `shapes` + * @param[out] materials Array of parsed materials + * @param[out] num_materials Array length of `materials` + * @param[in] file_name File name of .obj + * @param[in] file_reader File reader callback function(to read .obj and .mtl). + * @param[in] ctx Context pointer passed to the file_reader_callback. + * @param[in] flags combination of TINYOBJ_FLAG_*** + * + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERROR_*** when there is an error. + */ +extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, + size_t *num_shapes, tinyobj_material_t **materials, + size_t *num_materials, const char *file_name, + file_reader_callback file_reader, void *ctx, + unsigned int flags); + +/* Parse wavefront .mtl + * + * @param[out] materials_out + * @param[out] num_materials_out + * @param[in] filename .mtl filename + * @param[in] filename of .obj filename. could be NULL if you just want to parse + .mtl file. + * @param[in] file_reader File reader callback + * @param[in[ ctx Context pointer passed to the file_reader callack. + + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERROR_*** when there is an error. + */ +extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, + size_t *num_materials_out, + const char *filename, + const char *obj_filename, + file_reader_callback file_reader, void *ctx); + +extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib); +extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib); +extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes); +extern void tinyobj_materials_free(tinyobj_material_t *materials, + size_t num_materials); + +#ifdef __cplusplus +} +#endif +#endif /* TINOBJ_LOADER_C_H_ */ + +#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION +#include +#include +#include +#include + +#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) && \ + defined(TINYOBJ_FREE) && \ + (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED)) +/* ok */ +#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) && \ + !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) && \ + !defined(TINYOBJ_REALLOC_SIZED) +/* ok */ +#else +#error \ + "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)." +#endif + +#ifndef TINYOBJ_MALLOC +#include +#define TINYOBJ_MALLOC malloc +#define TINYOBJ_REALLOC realloc +#define TINYOBJ_CALLOC calloc +#define TINYOBJ_FREE free +#endif + +#ifndef TINYOBJ_REALLOC_SIZED +#define TINYOBJ_REALLOC_SIZED(p, oldsz, newsz) TINYOBJ_REALLOC(p, newsz) +#endif + +#define TINYOBJ_MAX_FACES_PER_F_LINE (16) +#define TINYOBJ_MAX_FILEPATH (8192) + +#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) +#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10)) +#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) + +static void skip_space(const char **token) { + while ((*token)[0] == ' ' || (*token)[0] == '\t') { + (*token)++; + } +} + +static void skip_space_and_cr(const char **token) { + while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') { + (*token)++; + } +} + +static int until_space(const char *token) { + const char *p = token; + while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') { + p++; + } + + return (int)(p - token); +} + +static size_t length_until_newline(const char *token, size_t n) { + size_t len = 0; + + /* Assume token[n-1] = '\0' */ + for (len = 0; len < n - 1; len++) { + if (token[len] == '\n') { + break; + } + if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) { + break; + } + } + + return len; +} + +static size_t length_until_line_feed(const char *token, size_t n) { + size_t len = 0; + + /* Assume token[n-1] = '\0' */ + for (len = 0; len < n; len++) { + if ((token[len] == '\n') || (token[len] == '\r')) { + break; + } + } + + return len; +} + +/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work + */ +static int my_atoi(const char *c) { + int value = 0; + int sign = 1; + if (*c == '+' || *c == '-') { + if (*c == '-') + sign = -1; + c++; + } + while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */ + value *= 10; + value += (int)(*c - '0'); + c++; + } + return value * sign; +} + +/* Make index zero-base, and also support relative index. */ +static int fixIndex(int idx, size_t n) { + if (idx > 0) + return idx - 1; + if (idx == 0) + return 0; + return (int)n + idx; /* negative value = relative */ +} + +/* Parse raw triples: i, i/j/k, i//k, i/j */ +static tinyobj_vertex_index_t parseRawTriple(const char **token) { + tinyobj_vertex_index_t vi; + /* 0x80000000 = -2147483648 = invalid */ + vi.v_idx = (int)(0x80000000); + vi.vn_idx = (int)(0x80000000); + vi.vt_idx = (int)(0x80000000); + + vi.v_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + if ((*token)[0] != '/') { + return vi; + } + (*token)++; + + /* i//k */ + if ((*token)[0] == '/') { + (*token)++; + vi.vn_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + return vi; + } + + /* i/j/k or i/j */ + vi.vt_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + if ((*token)[0] != '/') { + return vi; + } + + /* i/j/k */ + (*token)++; /* skip '/' */ + vi.vn_idx = my_atoi((*token)); + while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && + (*token)[0] != '\t' && (*token)[0] != '\r') { + (*token)++; + } + return vi; +} + +static int parseInt(const char **token) { + int i = 0; + skip_space(token); + i = my_atoi((*token)); + (*token) += until_space((*token)); + return i; +} + +/* + * Tries to parse a floating point number located at s. + * + * s_end should be a location in the string where reading should absolutely + * stop. For example at the end of the string, to prevent buffer overflows. + * + * Parses the following EBNF grammar: + * sign = "+" | "-" ; + * END = ? anything not in digit ? + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + * integer = [sign] , digit , {digit} ; + * decimal = integer , ["." , integer] ; + * float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; + * + * Valid strings are for example: + * -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 + * + * If the parsing is a success, result is set to the parsed value and true + * is returned. + * + * The function is greedy and will parse until any of the following happens: + * - a non-conforming character is encountered. + * - s_end is reached. + * + * The following situations triggers a failure: + * - s >= s_end. + * - parse failure. + */ +static int tryParseDouble(const char *s, const char *s_end, double *result) { + double mantissa = 0.0; + /* This exponent is base 2 rather than 10. + * However the exponent we parse is supposed to be one of ten, + * thus we must take care to convert the exponent/and or the + * mantissa to a * 2^E, where a is the mantissa and E is the + * exponent. + * To get the final double we will use ldexp, it requires the + * exponent to be in base 2. + */ + int exponent = 0; + + /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED + * TO JUMP OVER DEFINITIONS. + */ + char sign = '+'; + char exp_sign = '+'; + char const *curr = s; + + /* How many characters were read in a loop. */ + int read = 0; + /* Tells whether a loop terminated due to reaching s_end. */ + int end_not_reached = 0; + + /* + BEGIN PARSING. + */ + + if (s >= s_end) { + return 0; /* fail */ + } + + /* Find out what sign we've got. */ + if (*curr == '+' || *curr == '-') { + sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + goto fail; + } + + /* Read the integer part. */ + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + mantissa *= 10; + mantissa += (int)(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + + /* We must make sure we actually got something. */ + if (read == 0) + goto fail; + /* We allow numbers of form "#", "###" etc. */ + if (!end_not_reached) + goto assemble; + + /* Read the decimal part. */ + if (*curr == '.') { + curr++; + read = 1; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + /* pow(10.0, -read) */ + double frac_value = 1.0; + int f; + for (f = 0; f < read; f++) { + frac_value *= 0.1; + } + mantissa += (int)(*curr - 0x30) * frac_value; + read++; + curr++; + end_not_reached = (curr != s_end); + } + } else if (*curr == 'e' || *curr == 'E') { + } else { + goto assemble; + } + + if (!end_not_reached) + goto assemble; + + /* Read the exponent part. */ + if (*curr == 'e' || *curr == 'E') { + curr++; + /* Figure out if a sign is present and if it is. */ + end_not_reached = (curr != s_end); + if (end_not_reached && (*curr == '+' || *curr == '-')) { + exp_sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + /* Empty E is not allowed. */ + goto fail; + } + + read = 0; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + exponent *= 10; + exponent += (int)(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + if (read == 0) + goto fail; + } + +assemble: + +{ + double a = 1.0; /* = pow(5.0, exponent); */ + double b = 1.0; /* = 2.0^exponent */ + int i; + for (i = 0; i < exponent; i++) { + a = a * 5.0; + } + + for (i = 0; i < exponent; i++) { + b = b * 2.0; + } + + if (exp_sign == '-') { + a = 1.0 / a; + b = 1.0 / b; + } + + *result = + /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent), + exponent); */ + (sign == '+' ? 1 : -1) * (mantissa * a * b); +} + + return 1; +fail: + return 0; +} + +static float parseFloat(const char **token) { + const char *end; + double val = 0.0; + float f = 0.0f; + skip_space(token); + end = (*token) + until_space((*token)); + val = 0.0; + tryParseDouble((*token), end, &val); + f = (float)(val); + (*token) = end; + return f; +} + +static void parseFloat2(float *x, float *y, const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); +} + +static void parseFloat3(float *x, float *y, float *z, const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); + (*z) = parseFloat(token); +} + +static size_t my_strnlen(const char *s, size_t n) { + const char *p = (char *)memchr(s, 0, n); + return p ? (size_t)(p - s) : n; +} + +static char *my_strdup(const char *s, size_t max_length) { + char *d; + size_t len; + + if (s == NULL) + return NULL; + + /* Do not consider CRLF line ending(#19) */ + len = length_until_line_feed(s, max_length); + /* len = strlen(s); */ + + /* trim line ending and append '\0' */ + d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */ + memcpy(d, s, (size_t)(len)); + d[len] = '\0'; + + return d; +} + +static char *my_strndup(const char *s, size_t len) { + char *d; + size_t slen; + + if (s == NULL) + return NULL; + if (len == 0) + return NULL; + + slen = my_strnlen(s, len); + d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */ + if (!d) { + return NULL; + } + memcpy(d, s, slen); + d[slen] = '\0'; + + return d; +} + +char *dynamic_fgets(char **buf, size_t *size, FILE *file) { + char *offset; + char *ret; + size_t old_size; + + if (!(ret = fgets(*buf, (int)*size, file))) { + return ret; + } + + if (NULL != strchr(*buf, '\n')) { + return ret; + } + + do { + old_size = *size; + *size *= 2; + *buf = (char *)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size); + offset = &((*buf)[old_size - 1]); + + ret = fgets(offset, (int)(old_size + 1), file); + } while (ret && (NULL == strchr(*buf, '\n'))); + + return ret; +} + +static void initMaterial(tinyobj_material_t *material) { + int i; + material->name = NULL; + material->ambient_texname = NULL; + material->diffuse_texname = NULL; + material->specular_texname = NULL; + material->specular_highlight_texname = NULL; + material->bump_texname = NULL; + material->displacement_texname = NULL; + material->alpha_texname = NULL; + for (i = 0; i < 3; i++) { + material->ambient[i] = 0.f; + material->diffuse[i] = 0.f; + material->specular[i] = 0.f; + material->transmittance[i] = 0.f; + material->emission[i] = 0.f; + } + material->illum = 0; + material->dissolve = 1.f; + material->shininess = 1.f; + material->ior = 1.f; +} + +/* Implementation of string to int hashtable */ + +#define HASH_TABLE_ERROR 1 +#define HASH_TABLE_SUCCESS 0 + +#define HASH_TABLE_DEFAULT_SIZE 10 + +typedef struct hash_table_entry_t { + unsigned long hash; + int filled; + int pad0; + long value; + + struct hash_table_entry_t *next; +} hash_table_entry_t; + +typedef struct { + unsigned long *hashes; + hash_table_entry_t *entries; + size_t capacity; + size_t n; +} hash_table_t; + +static unsigned long hash_djb2(const unsigned char *str) { + unsigned long hash = 5381; + int c; + + while ((c = *str++)) { + hash = ((hash << 5) + hash) + (unsigned long)(c); + } + + return hash; +} + +static void create_hash_table(size_t start_capacity, hash_table_t *hash_table) { + if (start_capacity < 1) + start_capacity = HASH_TABLE_DEFAULT_SIZE; + hash_table->hashes = + (unsigned long *)TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long)); + hash_table->entries = (hash_table_entry_t *)TINYOBJ_CALLOC( + start_capacity, sizeof(hash_table_entry_t)); + hash_table->capacity = start_capacity; + hash_table->n = 0; +} + +static void destroy_hash_table(hash_table_t *hash_table) { + TINYOBJ_FREE(hash_table->entries); + TINYOBJ_FREE(hash_table->hashes); +} + +/* Insert with quadratic probing */ +static int hash_table_insert_value(unsigned long hash, long value, + hash_table_t *hash_table) { + /* Insert value */ + size_t start_index = hash % hash_table->capacity; + size_t index = start_index; + hash_table_entry_t *start_entry = hash_table->entries + start_index; + size_t i; + hash_table_entry_t *entry; + + for (i = 1; hash_table->entries[index].filled; i++) { + if (i >= hash_table->capacity) + return HASH_TABLE_ERROR; + index = (start_index + (i * i)) % hash_table->capacity; + } + + entry = hash_table->entries + index; + entry->hash = hash; + entry->filled = 1; + entry->value = value; + + if (index != start_index) { + /* This is a new entry, but not the start entry, hence we need to add a next + * pointer to our entry */ + entry->next = start_entry->next; + start_entry->next = entry; + } + + return HASH_TABLE_SUCCESS; +} + +static int hash_table_insert(unsigned long hash, long value, + hash_table_t *hash_table) { + int ret = hash_table_insert_value(hash, value, hash_table); + if (ret == HASH_TABLE_SUCCESS) { + hash_table->hashes[hash_table->n] = hash; + hash_table->n++; + } + return ret; +} + +static hash_table_entry_t *hash_table_find(unsigned long hash, + hash_table_t *hash_table) { + hash_table_entry_t *entry = + hash_table->entries + (hash % hash_table->capacity); + while (entry) { + if (entry->hash == hash && entry->filled) { + return entry; + } + entry = entry->next; + } + return NULL; +} + +static void hash_table_grow(hash_table_t *hash_table) { + size_t new_capacity; + hash_table_t new_hash_table; + size_t i; + + new_capacity = 2 * hash_table->capacity; + /* Create a new hash table. We're not calling create_hash_table because we + * want to realloc the hash array */ + new_hash_table.hashes = hash_table->hashes = + (unsigned long *)TINYOBJ_REALLOC_SIZED( + (void *)hash_table->hashes, + sizeof(unsigned long) * hash_table->capacity, + sizeof(unsigned long) * new_capacity); + new_hash_table.entries = (hash_table_entry_t *)TINYOBJ_CALLOC( + new_capacity, sizeof(hash_table_entry_t)); + new_hash_table.capacity = new_capacity; + new_hash_table.n = hash_table->n; + + /* Rehash */ + for (i = 0; i < hash_table->capacity; i++) { + hash_table_entry_t *entry = &hash_table->entries[i]; + if (entry->filled) { + hash_table_insert_value(entry->hash, entry->value, &new_hash_table); + } + } + + TINYOBJ_FREE(hash_table->entries); + (*hash_table) = new_hash_table; +} + +static int hash_table_exists(const char *name, hash_table_t *hash_table) { + return hash_table_find(hash_djb2((const unsigned char *)name), hash_table) != + NULL; +} + +static void hash_table_set(const char *name, size_t val, + hash_table_t *hash_table) { + /* Hash name */ + unsigned long hash = hash_djb2((const unsigned char *)name); + + hash_table_entry_t *entry = hash_table_find(hash, hash_table); + if (entry) { + entry->value = (long)val; + return; + } + + /* Expand if necessary + * Grow until the element has been added + */ + while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS) { + hash_table_grow(hash_table); + } +} + +static long hash_table_get(const char *name, hash_table_t *hash_table) { + hash_table_entry_t *ret = + hash_table_find(hash_djb2((const unsigned char *)(name)), hash_table); + return ret->value; +} + +static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev, + size_t num_materials, + tinyobj_material_t *new_mat) { + tinyobj_material_t *dst; + size_t num_bytes = sizeof(tinyobj_material_t) * num_materials; + dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED( + prev, num_bytes, num_bytes + sizeof(tinyobj_material_t)); + + dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */ + return dst; +} + +static int is_line_ending(const char *p, size_t i, size_t end_i) { + if (p[i] == '\0') + return 1; + if (p[i] == '\n') + return 1; /* this includes \r\n */ + if (p[i] == '\r') { + if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */ + return 1; + } + } + return 0; +} + +typedef struct { + size_t pos; + size_t len; +} LineInfo; + +/* Find '\n' and create line data. */ +static int get_line_infos(const char *buf, size_t buf_len, + LineInfo **line_infos, size_t *num_lines) { + size_t i = 0; + size_t end_idx = buf_len; + size_t prev_pos = 0; + size_t line_no = 0; + size_t last_line_ending = 0; + + /* Count # of lines. */ + for (i = 0; i < end_idx; i++) { + if (is_line_ending(buf, i, end_idx)) { + (*num_lines)++; + last_line_ending = i; + } + } + /* The last char from the input may not be a line + * ending character so add an extra line if there + * are more characters after the last line ending + * that was found. */ + if (end_idx - last_line_ending > 1) { + (*num_lines)++; + } + + if (*num_lines == 0) + return TINYOBJ_ERROR_EMPTY; + + *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines)); + + /* Fill line infos. */ + for (i = 0; i < end_idx; i++) { + if (is_line_ending(buf, i, end_idx)) { + (*line_infos)[line_no].pos = prev_pos; + (*line_infos)[line_no].len = i - prev_pos; + prev_pos = i + 1; + line_no++; + } + } + if (end_idx - last_line_ending > 1) { + (*line_infos)[line_no].pos = prev_pos; + (*line_infos)[line_no].len = end_idx - 1 - last_line_ending; + } + + return 0; +} + +static int tinyobj_parse_and_index_mtl_file( + tinyobj_material_t **materials_out, size_t *num_materials_out, + const char *mtl_filename, const char *obj_filename, + file_reader_callback file_reader, void *ctx, hash_table_t *material_table) { + tinyobj_material_t material; + size_t num_materials = 0; + tinyobj_material_t *materials = NULL; + int has_previous_material = 0; + const char *line_end = NULL; + size_t num_lines = 0; + LineInfo *line_infos = NULL; + size_t i = 0; + char *buf = NULL; + size_t len = 0; + + if (materials_out == NULL) { + return TINYOBJ_ERROR_INVALID_PARAMETER; + } + + if (num_materials_out == NULL) { + return TINYOBJ_ERROR_INVALID_PARAMETER; + } + + (*materials_out) = NULL; + (*num_materials_out) = 0; + + file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len); + if (len < 1) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (buf == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + + if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { + TINYOBJ_FREE(line_infos); + return TINYOBJ_ERROR_EMPTY; + } + + /* Create a default material */ + initMaterial(&material); + + for (i = 0; i < num_lines; i++) { + const char *p = &buf[line_infos[i].pos]; + size_t p_len = line_infos[i].len; + + char linebuf[4096]; + const char *token; + assert(p_len < 4095); + + memcpy(linebuf, p, p_len); + linebuf[p_len] = '\0'; + + token = linebuf; + line_end = token + p_len; + + /* Skip leading space. */ + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') + continue; /* empty line */ + + if (token[0] == '#') + continue; /* comment line */ + + /* new mtl */ + if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) { + char namebuf[4096]; + + /* flush previous material. */ + if (has_previous_material) { + materials = tinyobj_material_add(materials, num_materials, &material); + num_materials++; + } else { + has_previous_material = 1; + } + + /* initial temporary material */ + initMaterial(&material); + + /* set new mtl name */ + token += 7; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else + sscanf(token, "%s", namebuf); +#endif + material.name = my_strdup(namebuf, (size_t)(line_end - token)); + + /* Add material to material table */ + if (material_table) + hash_table_set(material.name, num_materials, material_table); + + continue; + } + + /* ambient */ + if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.ambient[0] = r; + material.ambient[1] = g; + material.ambient[2] = b; + continue; + } + + /* diffuse */ + if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.diffuse[0] = r; + material.diffuse[1] = g; + material.diffuse[2] = b; + continue; + } + + /* specular */ + if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.specular[0] = r; + material.specular[1] = g; + material.specular[2] = b; + continue; + } + + /* transmittance */ + if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.transmittance[0] = r; + material.transmittance[1] = g; + material.transmittance[2] = b; + continue; + } + + /* ior(index of refraction) */ + if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) { + token += 2; + material.ior = parseFloat(&token); + continue; + } + + /* emission */ + if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) { + float r, g, b; + token += 2; + parseFloat3(&r, &g, &b, &token); + material.emission[0] = r; + material.emission[1] = g; + material.emission[2] = b; + continue; + } + + /* shininess */ + if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) { + token += 2; + material.shininess = parseFloat(&token); + continue; + } + + /* illum model */ + if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) { + token += 6; + material.illum = parseInt(&token); + continue; + } + + /* dissolve */ + if ((token[0] == 'd' && IS_SPACE(token[1]))) { + token += 1; + material.dissolve = parseFloat(&token); + continue; + } + if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) { + token += 2; + /* Invert value of Tr(assume Tr is in range [0, 1]) */ + material.dissolve = 1.0f - parseFloat(&token); + continue; + } + + /* ambient texture */ + if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) { + token += 7; + material.ambient_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* diffuse texture */ + if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) { + token += 7; + material.diffuse_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* specular texture */ + if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) { + token += 7; + material.specular_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* specular highlight texture */ + if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) { + token += 7; + material.specular_highlight_texname = + my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* bump texture */ + if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) { + token += 9; + material.bump_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* alpha texture */ + if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) { + token += 6; + material.alpha_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* bump texture */ + if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) { + token += 5; + material.bump_texname = my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* displacement texture */ + if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) { + token += 5; + material.displacement_texname = + my_strdup(token, (size_t)(line_end - token)); + continue; + } + + /* @todo { unknown parameter } */ + } + + TINYOBJ_FREE(line_infos); + + if (material.name) { + /* Flush last material element */ + materials = tinyobj_material_add(materials, num_materials, &material); + num_materials++; + } + + (*num_materials_out) = num_materials; + (*materials_out) = materials; + + return TINYOBJ_SUCCESS; +} + +int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, + size_t *num_materials_out, const char *mtl_filename, + const char *obj_filename, + file_reader_callback file_reader, void *ctx) { + return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out, + mtl_filename, obj_filename, + file_reader, ctx, NULL); +} + +typedef enum { + COMMAND_EMPTY, + COMMAND_V, + COMMAND_VN, + COMMAND_VT, + COMMAND_F, + COMMAND_G, + COMMAND_O, + COMMAND_USEMTL, + COMMAND_MTLLIB + +} CommandType; + +typedef struct { + float vx, vy, vz; + float nx, ny, nz; + float tx, ty; + + /* @todo { Use dynamic array } */ + tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; + size_t num_f; + + int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE]; + size_t num_f_num_verts; + + const char *group_name; + unsigned int group_name_len; + int pad0; + + const char *object_name; + unsigned int object_name_len; + int pad1; + + const char *material_name; + unsigned int material_name_len; + int pad2; + + const char *mtllib_name; + unsigned int mtllib_name_len; + + CommandType type; +} Command; + +static int parseLine(Command *command, const char *p, size_t p_len, + int triangulate) { + char linebuf[4096]; + const char *token; + assert(p_len < 4095); + + memcpy(linebuf, p, p_len); + linebuf[p_len] = '\0'; + + token = linebuf; + + command->type = COMMAND_EMPTY; + + /* Skip leading space. */ + skip_space(&token); + + assert(token); + if (token[0] == '\0') { /* empty line */ + return 0; + } + + if (token[0] == '#') { /* comment line */ + return 0; + } + + /* vertex */ + if (token[0] == 'v' && IS_SPACE((token[1]))) { + float x, y, z; + token += 2; + parseFloat3(&x, &y, &z, &token); + command->vx = x; + command->vy = y; + command->vz = z; + command->type = COMMAND_V; + return 1; + } + + /* normal */ + if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { + float x, y, z; + token += 3; + parseFloat3(&x, &y, &z, &token); + command->nx = x; + command->ny = y; + command->nz = z; + command->type = COMMAND_VN; + return 1; + } + + /* texcoord */ + if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { + float x, y; + token += 3; + parseFloat2(&x, &y, &token); + command->tx = x; + command->ty = y; + command->type = COMMAND_VT; + return 1; + } + + /* face */ + if (token[0] == 'f' && IS_SPACE((token[1]))) { + size_t num_f = 0; + + tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; + token += 2; + skip_space(&token); + + while (!IS_NEW_LINE(token[0])) { + tinyobj_vertex_index_t vi = parseRawTriple(&token); + skip_space_and_cr(&token); + + f[num_f] = vi; + num_f++; + } + + command->type = COMMAND_F; + + if (triangulate) { + size_t k; + size_t n = 0; + + tinyobj_vertex_index_t i0 = f[0]; + tinyobj_vertex_index_t i1; + tinyobj_vertex_index_t i2 = f[1]; + + assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE); + + for (k = 2; k < num_f; k++) { + i1 = i2; + i2 = f[k]; + command->f[3 * n + 0] = i0; + command->f[3 * n + 1] = i1; + command->f[3 * n + 2] = i2; + + command->f_num_verts[n] = 3; + n++; + } + command->num_f = 3 * n; + command->num_f_num_verts = n; + + } else { + size_t k = 0; + assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE); + for (k = 0; k < num_f; k++) { + command->f[k] = f[k]; + } + + command->num_f = num_f; + command->f_num_verts[0] = (int)num_f; + command->num_f_num_verts = 1; + } + + return 1; + } + + /* use mtl */ + if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { + token += 7; + + skip_space(&token); + command->material_name = p + (token - linebuf); + command->material_name_len = (unsigned int)length_until_newline( + token, (p_len - (size_t)(token - linebuf)) + 1); + command->type = COMMAND_USEMTL; + + return 1; + } + + /* load mtl */ + if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { + /* By specification, `mtllib` should be appear only once in .obj */ + token += 7; + + skip_space(&token); + command->mtllib_name = p + (token - linebuf); + command->mtllib_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_MTLLIB; + + return 1; + } + + /* group name */ + if (token[0] == 'g' && IS_SPACE((token[1]))) { + /* @todo { multiple group name. } */ + token += 2; + + command->group_name = p + (token - linebuf); + command->group_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_G; + + return 1; + } + + /* object name */ + if (token[0] == 'o' && IS_SPACE((token[1]))) { + /* @todo { multiple object name? } */ + token += 2; + + command->object_name = p + (token - linebuf); + command->object_name_len = (unsigned int)length_until_newline( + token, p_len - (size_t)(token - linebuf)) + + 1; + command->type = COMMAND_O; + + return 1; + } + + return 0; +} + +static size_t basename_len(const char *filename, size_t filename_length) { + /* Count includes NUL terminator. */ + const char *p = &filename[filename_length - 1]; + size_t count = 1; + +/* On Windows, the directory delimiter is '\' and both it and '/' is + * reserved by the filesystem. On *nix platforms, only the '/' character + * is reserved, so account for the two cases separately. */ +#if _WIN32 + while (p[-1] != '/' && p[-1] != '\\') { + if (p == filename) { + count = filename_length; + return count; + } + count++; + p--; + } + p++; + return count; +#else + while (*(--p) != '/') { + if (p == filename) { + count = filename_length; + return count; + } + count++; + } + return count; +#endif +} + +static char *generate_mtl_filename(const char *obj_filename, + size_t obj_filename_length, + const char *mtllib_name, + size_t mtllib_name_length) { + /* Create a dynamically-allocated material filename. This allows the material + * and obj files to be separated, however the mtllib name in the OBJ file + * must be a relative path to the material file from the OBJ's directory. + * This does not support the matllib name as an absolute address. */ + char *mtl_filename; + char *p; + size_t mtl_filename_length; + size_t obj_basename_length; + + /* Calculate required size of mtl_filename and allocate */ + obj_basename_length = basename_len(obj_filename, obj_filename_length); + mtl_filename_length = + (obj_filename_length - obj_basename_length) + mtllib_name_length; + mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length); + + /* Copy over the obj's path */ + memcpy(mtl_filename, obj_filename, + (obj_filename_length - obj_basename_length)); + + /* Overwrite the obj basename with the mtllib name, filling the string */ + p = &mtl_filename[mtl_filename_length - mtllib_name_length]; + strcpy(p, mtllib_name); + return mtl_filename; +} + +int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, + size_t *num_shapes, tinyobj_material_t **materials_out, + size_t *num_materials_out, const char *obj_filename, + file_reader_callback file_reader, void *ctx, + unsigned int flags) { + LineInfo *line_infos = NULL; + Command *commands = NULL; + size_t num_lines = 0; + + size_t num_v = 0; + size_t num_vn = 0; + size_t num_vt = 0; + size_t num_f = 0; + size_t num_faces = 0; + + int mtllib_line_index = -1; + + tinyobj_material_t *materials = NULL; + size_t num_materials = 0; + + hash_table_t material_table; + + char *buf = NULL; + size_t len = 0; + file_reader(ctx, obj_filename, /* is_mtl */ 0, obj_filename, &buf, &len); + + if (len < 1) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (attrib == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (shapes == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (num_shapes == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (buf == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (materials_out == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + if (num_materials_out == NULL) + return TINYOBJ_ERROR_INVALID_PARAMETER; + + tinyobj_attrib_init(attrib); + + /* 1. create line data */ + if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { + return TINYOBJ_ERROR_EMPTY; + } + + commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines); + + create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table); + + /* 2. parse each line */ + { + size_t i = 0; + for (i = 0; i < num_lines; i++) { + int ret = parseLine(&commands[i], &buf[line_infos[i].pos], + line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE); + if (ret) { + if (commands[i].type == COMMAND_V) { + num_v++; + } else if (commands[i].type == COMMAND_VN) { + num_vn++; + } else if (commands[i].type == COMMAND_VT) { + num_vt++; + } else if (commands[i].type == COMMAND_F) { + num_f += commands[i].num_f; + num_faces += commands[i].num_f_num_verts; + } + + if (commands[i].type == COMMAND_MTLLIB) { + mtllib_line_index = (int)i; + } + } + } + } + + /* line_infos are not used anymore. Release memory. */ + if (line_infos) { + TINYOBJ_FREE(line_infos); + } + + /* Load material (if it exists) */ + if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name && + commands[mtllib_line_index].mtllib_name_len > 0) { + /* Maximum length allowed by Linux - higher than Windows and macOS */ + size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1; + char *mtl_filename; + char *mtllib_name; + size_t mtllib_name_len = 0; + int ret; + + mtllib_name_len = + length_until_line_feed(commands[mtllib_line_index].mtllib_name, + commands[mtllib_line_index].mtllib_name_len); + + mtllib_name = + my_strndup(commands[mtllib_line_index].mtllib_name, mtllib_name_len); + + /* allow for NUL terminator */ + mtllib_name_len++; + mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len, + mtllib_name, mtllib_name_len); + + ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials, + mtl_filename, obj_filename, + file_reader, ctx, &material_table); + + if (ret != TINYOBJ_SUCCESS) { + /* warning. */ + fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n", + mtl_filename, ret); + } + TINYOBJ_FREE(mtl_filename); + TINYOBJ_FREE(mtllib_name); + } + + /* Construct attributes */ + + { + size_t v_count = 0; + size_t n_count = 0; + size_t t_count = 0; + size_t f_count = 0; + size_t face_count = 0; + int material_id = -1; /* -1 = default unknown material. */ + size_t i = 0; + + attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3); + attrib->num_vertices = (unsigned int)num_v; + attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3); + attrib->num_normals = (unsigned int)num_vn; + attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2); + attrib->num_texcoords = (unsigned int)num_vt; + attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC( + sizeof(tinyobj_vertex_index_t) * num_f); + attrib->num_faces = (unsigned int)num_f; + attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); + attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); + attrib->num_face_num_verts = (unsigned int)num_faces; + + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_EMPTY) { + continue; + } else if (commands[i].type == COMMAND_USEMTL) { + /* @todo + if (commands[t][i].material_name && + commands[t][i].material_name_len > 0) { + std::string material_name(commands[t][i].material_name, + commands[t][i].material_name_len); + + if (material_map.find(material_name) != material_map.end()) { + material_id = material_map[material_name]; + } else { + // Assign invalid material ID + material_id = -1; + } + } + */ + if (commands[i].material_name && commands[i].material_name_len > 0) { + /* Create a null terminated string */ + char *material_name_null_term = + (char *)TINYOBJ_MALLOC(commands[i].material_name_len + 1); + memcpy((void *)material_name_null_term, + (const void *)commands[i].material_name, + commands[i].material_name_len); + material_name_null_term[commands[i].material_name_len] = 0; + + if (hash_table_exists(material_name_null_term, &material_table)) + material_id = + (int)hash_table_get(material_name_null_term, &material_table); + else + material_id = -1; + + TINYOBJ_FREE(material_name_null_term); + } + } else if (commands[i].type == COMMAND_V) { + attrib->vertices[3 * v_count + 0] = commands[i].vx; + attrib->vertices[3 * v_count + 1] = commands[i].vy; + attrib->vertices[3 * v_count + 2] = commands[i].vz; + v_count++; + } else if (commands[i].type == COMMAND_VN) { + attrib->normals[3 * n_count + 0] = commands[i].nx; + attrib->normals[3 * n_count + 1] = commands[i].ny; + attrib->normals[3 * n_count + 2] = commands[i].nz; + n_count++; + } else if (commands[i].type == COMMAND_VT) { + attrib->texcoords[2 * t_count + 0] = commands[i].tx; + attrib->texcoords[2 * t_count + 1] = commands[i].ty; + t_count++; + } else if (commands[i].type == COMMAND_F) { + size_t k = 0; + for (k = 0; k < commands[i].num_f; k++) { + tinyobj_vertex_index_t vi = commands[i].f[k]; + int v_idx = fixIndex(vi.v_idx, v_count); + int vn_idx = fixIndex(vi.vn_idx, n_count); + int vt_idx = fixIndex(vi.vt_idx, t_count); + attrib->faces[f_count + k].v_idx = v_idx; + attrib->faces[f_count + k].vn_idx = vn_idx; + attrib->faces[f_count + k].vt_idx = vt_idx; + } + + for (k = 0; k < commands[i].num_f_num_verts; k++) { + attrib->material_ids[face_count + k] = material_id; + attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k]; + } + + f_count += commands[i].num_f; + face_count += commands[i].num_f_num_verts; + } + } + } + + /* 5. Construct shape information. */ + { + unsigned int face_count = 0; + size_t i = 0; + size_t n = 0; + size_t shape_idx = 0; + + const char *shape_name = NULL; + unsigned int shape_name_len = 0; + const char *prev_shape_name = NULL; + unsigned int prev_shape_name_len = 0; + unsigned int prev_shape_face_offset = 0; + unsigned int prev_face_offset = 0; + tinyobj_shape_t prev_shape = {NULL, 0, 0}; + + /* Find the number of shapes in .obj */ + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { + n++; + } + } + + /* Allocate array of shapes with maximum possible size(+1 for unnamed + * group/object). + * Actual # of shapes found in .obj is determined in the later */ + (*shapes) = + (tinyobj_shape_t *)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1)); + + for (i = 0; i < num_lines; i++) { + if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { + if (commands[i].type == COMMAND_O) { + shape_name = commands[i].object_name; + shape_name_len = commands[i].object_name_len; + } else { + shape_name = commands[i].group_name; + shape_name_len = commands[i].group_name_len; + } + + if (face_count == 0) { + /* 'o' or 'g' appears before any 'f' */ + prev_shape_name = shape_name; + prev_shape_name_len = shape_name_len; + prev_shape_face_offset = face_count; + prev_face_offset = face_count; + } else { + if (shape_idx == 0) { + /* 'o' or 'g' after some 'v' lines. */ + (*shapes)[shape_idx].name = my_strndup( + prev_shape_name, prev_shape_name_len); /* may be NULL */ + (*shapes)[shape_idx].face_offset = prev_shape.face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + + prev_face_offset = face_count; + + } else { + if ((face_count - prev_face_offset) > 0) { + (*shapes)[shape_idx].name = + my_strndup(prev_shape_name, prev_shape_name_len); + (*shapes)[shape_idx].face_offset = prev_face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + prev_face_offset = face_count; + } + } + + /* Record shape info for succeeding 'o' or 'g' command. */ + prev_shape_name = shape_name; + prev_shape_name_len = shape_name_len; + prev_shape_face_offset = face_count; + } + } + if (commands[i].type == COMMAND_F) { + face_count++; + } + } + + if ((face_count - prev_face_offset) > 0) { + size_t length = face_count - prev_shape_face_offset; + if (length > 0) { + (*shapes)[shape_idx].name = + my_strndup(prev_shape_name, prev_shape_name_len); + (*shapes)[shape_idx].face_offset = prev_face_offset; + (*shapes)[shape_idx].length = face_count - prev_face_offset; + shape_idx++; + } + } else { + /* Guess no 'v' line occurrence after 'o' or 'g', so discards current + * shape information. */ + } + + (*num_shapes) = shape_idx; + } + + if (commands) { + TINYOBJ_FREE(commands); + } + + destroy_hash_table(&material_table); + + (*materials_out) = materials; + (*num_materials_out) = num_materials; + + return TINYOBJ_SUCCESS; +} + +void tinyobj_attrib_init(tinyobj_attrib_t *attrib) { + attrib->vertices = NULL; + attrib->num_vertices = 0; + attrib->normals = NULL; + attrib->num_normals = 0; + attrib->texcoords = NULL; + attrib->num_texcoords = 0; + attrib->faces = NULL; + attrib->num_faces = 0; + attrib->face_num_verts = NULL; + attrib->num_face_num_verts = 0; + attrib->material_ids = NULL; +} + +void tinyobj_attrib_free(tinyobj_attrib_t *attrib) { + if (attrib->vertices) + TINYOBJ_FREE(attrib->vertices); + if (attrib->normals) + TINYOBJ_FREE(attrib->normals); + if (attrib->texcoords) + TINYOBJ_FREE(attrib->texcoords); + if (attrib->faces) + TINYOBJ_FREE(attrib->faces); + if (attrib->face_num_verts) + TINYOBJ_FREE(attrib->face_num_verts); + if (attrib->material_ids) + TINYOBJ_FREE(attrib->material_ids); +} + +void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) { + size_t i; + if (shapes == NULL) + return; + + for (i = 0; i < num_shapes; i++) { + if (shapes[i].name) + TINYOBJ_FREE(shapes[i].name); + } + + TINYOBJ_FREE(shapes); +} + +void tinyobj_materials_free(tinyobj_material_t *materials, + size_t num_materials) { + size_t i; + if (materials == NULL) + return; + + for (i = 0; i < num_materials; i++) { + if (materials[i].name) + TINYOBJ_FREE(materials[i].name); + if (materials[i].ambient_texname) + TINYOBJ_FREE(materials[i].ambient_texname); + if (materials[i].diffuse_texname) + TINYOBJ_FREE(materials[i].diffuse_texname); + if (materials[i].specular_texname) + TINYOBJ_FREE(materials[i].specular_texname); + if (materials[i].specular_highlight_texname) + TINYOBJ_FREE(materials[i].specular_highlight_texname); + if (materials[i].bump_texname) + TINYOBJ_FREE(materials[i].bump_texname); + if (materials[i].displacement_texname) + TINYOBJ_FREE(materials[i].displacement_texname); + if (materials[i].alpha_texname) + TINYOBJ_FREE(materials[i].alpha_texname); + } + + TINYOBJ_FREE(materials); +} +#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */ diff --git a/vendor/vk_mem_alloc.h b/vendor/vk_mem_alloc.h new file mode 100644 index 0000000..6f71d5b --- /dev/null +++ b/vendor/vk_mem_alloc.h @@ -0,0 +1,19111 @@ +// +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H +#define AMD_VULKAN_MEMORY_ALLOCATOR_H + +/** \mainpage Vulkan Memory Allocator + +Version 3.2.1 + +Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) + + +API documentation divided into groups: [Topics](topics.html) + +General documentation chapters: + +- User guide + - \subpage quick_start + - [Project setup](@ref quick_start_project_setup) + - [Initialization](@ref quick_start_initialization) + - [Resource allocation](@ref quick_start_resource_allocation) + - \subpage choosing_memory_type + - [Usage](@ref choosing_memory_type_usage) + - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags) + - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types) + - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) + - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) + - \subpage memory_mapping + - [Copy functions](@ref memory_mapping_copy_functions) + - [Mapping functions](@ref memory_mapping_mapping_functions) + - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) + - [Cache flush and invalidate](@ref memory_mapping_cache_control) + - \subpage staying_within_budget + - [Querying for budget](@ref staying_within_budget_querying_for_budget) + - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) + - \subpage resource_aliasing + - \subpage custom_memory_pools + - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) + - [When not to use custom pools](@ref custom_memory_pools_when_not_use) + - [Linear allocation algorithm](@ref linear_algorithm) + - [Free-at-once](@ref linear_algorithm_free_at_once) + - [Stack](@ref linear_algorithm_stack) + - [Double stack](@ref linear_algorithm_double_stack) + - [Ring buffer](@ref linear_algorithm_ring_buffer) + - \subpage defragmentation + - \subpage statistics + - [Numeric statistics](@ref statistics_numeric_statistics) + - [JSON dump](@ref statistics_json_dump) + - \subpage allocation_annotation + - [Allocation user data](@ref allocation_user_data) + - [Allocation names](@ref allocation_names) + - \subpage virtual_allocator + - \subpage debugging_memory_usage + - [Memory initialization](@ref debugging_memory_usage_initialization) + - [Margins](@ref debugging_memory_usage_margins) + - [Corruption detection](@ref debugging_memory_usage_corruption_detection) + - [Leak detection features](@ref debugging_memory_usage_leak_detection) + - \subpage other_api_interop +- \subpage usage_patterns + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) +- \subpage configuration + - [Pointers to Vulkan functions](@ref config_Vulkan_functions) + - [Custom host memory allocator](@ref custom_memory_allocator) + - [Device memory allocation callbacks](@ref allocation_callbacks) + - [Device heap memory limit](@ref heap_memory_limit) +- Extension support + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory + - \subpage vk_khr_external_memory_win32 +- \subpage general_considerations + - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) + - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) + - [Allocation algorithm](@ref general_considerations_allocation_algorithm) + - [Features not supported](@ref general_considerations_features_not_supported) + +\defgroup group_init Library initialization + +\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. + +\defgroup group_alloc Memory allocation + +\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images. +Most basic ones being: vmaCreateBuffer(), vmaCreateImage(). + +\defgroup group_virtual Virtual allocator + +\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm +for user-defined purpose without allocating any real GPU memory. + +\defgroup group_stats Statistics + +\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(VULKAN_H_) +#include +#endif + +#if !defined(VMA_VULKAN_VERSION) + #if defined(VK_VERSION_1_4) + #define VMA_VULKAN_VERSION 1004000 + #elif defined(VK_VERSION_1_3) + #define VMA_VULKAN_VERSION 1003000 + #elif defined(VK_VERSION_1_2) + #define VMA_VULKAN_VERSION 1002000 + #elif defined(VK_VERSION_1_1) + #define VMA_VULKAN_VERSION 1001000 + #else + #define VMA_VULKAN_VERSION 1000000 + #endif +#endif + +#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS + extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + extern PFN_vkAllocateMemory vkAllocateMemory; + extern PFN_vkFreeMemory vkFreeMemory; + extern PFN_vkMapMemory vkMapMemory; + extern PFN_vkUnmapMemory vkUnmapMemory; + extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; + extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; + extern PFN_vkBindBufferMemory vkBindBufferMemory; + extern PFN_vkBindImageMemory vkBindImageMemory; + extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + extern PFN_vkCreateBuffer vkCreateBuffer; + extern PFN_vkDestroyBuffer vkDestroyBuffer; + extern PFN_vkCreateImage vkCreateImage; + extern PFN_vkDestroyImage vkDestroyImage; + extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + #if VMA_VULKAN_VERSION >= 1001000 + extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2; + extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2; + extern PFN_vkBindBufferMemory2 vkBindBufferMemory2; + extern PFN_vkBindImageMemory2 vkBindImageMemory2; + extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2; + #endif // #if VMA_VULKAN_VERSION >= 1001000 +#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES + +#if !defined(VMA_DEDICATED_ALLOCATION) + #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation + #define VMA_DEDICATED_ALLOCATION 1 + #else + #define VMA_DEDICATED_ALLOCATION 0 + #endif +#endif + +#if !defined(VMA_BIND_MEMORY2) + #if VK_KHR_bind_memory2 + #define VMA_BIND_MEMORY2 1 + #else + #define VMA_BIND_MEMORY2 0 + #endif +#endif + +#if !defined(VMA_MEMORY_BUDGET) + #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000) + #define VMA_MEMORY_BUDGET 1 + #else + #define VMA_MEMORY_BUDGET 0 + #endif +#endif + +// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers. +#if !defined(VMA_BUFFER_DEVICE_ADDRESS) + #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000 + #define VMA_BUFFER_DEVICE_ADDRESS 1 + #else + #define VMA_BUFFER_DEVICE_ADDRESS 0 + #endif +#endif + +// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers. +#if !defined(VMA_MEMORY_PRIORITY) + #if VK_EXT_memory_priority + #define VMA_MEMORY_PRIORITY 1 + #else + #define VMA_MEMORY_PRIORITY 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) + #if VK_KHR_maintenance4 + #define VMA_KHR_MAINTENANCE4 1 + #else + #define VMA_KHR_MAINTENANCE4 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) + #if VK_KHR_maintenance5 + #define VMA_KHR_MAINTENANCE5 1 + #else + #define VMA_KHR_MAINTENANCE5 0 + #endif +#endif + + +// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY) + #if VK_KHR_external_memory + #define VMA_EXTERNAL_MEMORY 1 + #else + #define VMA_EXTERNAL_MEMORY 0 + #endif +#endif + +// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY_WIN32) + #if VK_KHR_external_memory_win32 + #define VMA_EXTERNAL_MEMORY_WIN32 1 + #else + #define VMA_EXTERNAL_MEMORY_WIN32 0 + #endif +#endif + +// Define these macros to decorate all public functions with additional code, +// before and after returned type, appropriately. This may be useful for +// exporting the functions when compiling VMA as a separate library. Example: +// #define VMA_CALL_PRE __declspec(dllexport) +// #define VMA_CALL_POST __cdecl +#ifndef VMA_CALL_PRE + #define VMA_CALL_PRE +#endif +#ifndef VMA_CALL_POST + #define VMA_CALL_POST +#endif + +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT + #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + +// Define this macro to decorate pointers with an attribute specifying the +// length of the array they point to if they are not null. +// +// The length may be one of +// - The name of another parameter in the argument list where the pointer is declared +// - The name of another member in the struct where the pointer is declared +// - The name of a member of a struct type, meaning the value of that member in +// the context of the call. For example +// VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"), +// this means the number of memory heaps available in the device associated +// with the VmaAllocator being dealt with. +#ifndef VMA_LEN_IF_NOT_NULL + #define VMA_LEN_IF_NOT_NULL(len) +#endif + +// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nullable +#ifndef VMA_NULLABLE + #ifdef __clang__ + #define VMA_NULLABLE _Nullable + #else + #define VMA_NULLABLE + #endif +#endif + +// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull +#ifndef VMA_NOT_NULL + #ifdef __clang__ + #define VMA_NOT_NULL _Nonnull + #else + #define VMA_NOT_NULL + #endif +#endif + +// If non-dispatchable handles are represented as pointers then we can give +// then nullability annotations +#ifndef VMA_NOT_NULL_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL + #else + #define VMA_NOT_NULL_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_NULLABLE_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE + #else + #define VMA_NULLABLE_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_STATS_STRING_ENABLED + #define VMA_STATS_STRING_ENABLED 1 +#endif + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// INTERFACE +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE. +#ifndef _VMA_ENUM_DECLARATIONS + +/** +\addtogroup group_init +@{ +*/ + +/// Flags for created #VmaAllocator. +typedef enum VmaAllocatorCreateFlagBits +{ + /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. + + Using this flag may increase performance because internal mutexes are not used. + */ + VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, + /** \brief Enables usage of VK_KHR_dedicated_allocation extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + Using this extension will automatically allocate dedicated blocks of memory for + some buffers and images instead of suballocating place for them out of bigger + memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT + flag) when it is recommended by the driver. It may improve performance on some + GPUs. + + You may set this flag only if you found out that following device extensions are + supported, you enabled them while creating Vulkan device passed as + VmaAllocatorCreateInfo::device, and you want them to be used internally by this + library: + + - VK_KHR_get_memory_requirements2 (device extension) + - VK_KHR_dedicated_allocation (device extension) + + When this flag is set, you can experience following warnings reported by Vulkan + validation layer. You can ignore them. + + > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. + */ + VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, + /** + Enables usage of VK_KHR_bind_memory2 extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library. + + The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`, + which allow to pass a chain of `pNext` structures while binding. + This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2(). + */ + VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004, + /** + Enables usage of VK_EXT_memory_budget extension. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library, along with another instance extension + VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted). + + The extension provides query for current memory usage and budget, which will probably + be more accurate than an estimation used by the library otherwise. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, + /** + Enables usage of VK_AMD_device_coherent_memory extension. + + You may set this flag only if you: + + - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, + - want it to be used internally by this library. + + The extension and accompanying device feature provide access to memory types with + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. + They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + + When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. + To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, + returning `VK_ERROR_FEATURE_NOT_PRESENT`. + */ + VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, + /** + Enables usage of "buffer device address" feature, which allows you to use function + `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader. + + You may set this flag only if you: + + 1. (For Vulkan version < 1.2) Found as available and enabled device extension + VK_KHR_buffer_device_address. + This extension is promoted to core Vulkan 1.2. + 2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`. + + When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA. + The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to + allocated memory blocks wherever it might be needed. + + For more information, see documentation chapter \ref enabling_buffer_device_address. + */ + VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020, + /** + Enables usage of VK_EXT_memory_priority extension in the library. + + You may set this flag only if you found available and enabled this device extension, + along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + + When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority + are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored. + + A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + Larger values are higher priority. The granularity of the priorities is implementation-dependent. + It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`. + The value to be used for default priority is 0.5. + For more details, see the documentation of the VK_EXT_memory_priority extension. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, + /** + Enables usage of VK_KHR_maintenance4 extension in the library. + + You may set this flag only if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, + /** + Enables usage of VK_KHR_maintenance5 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + + /** + Enables usage of VK_KHR_external_memory_win32 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + For more information, see \ref vk_khr_external_memory_win32. + */ + VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200, + + VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. +typedef VkFlags VmaAllocatorCreateFlags; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/// \brief Intended usage of the allocated memory. +typedef enum VmaMemoryUsage +{ + /** No intended memory usage specified. + Use other members of VmaAllocationCreateInfo to specify your requirements. + */ + VMA_MEMORY_USAGE_UNKNOWN = 0, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_GPU_ONLY = 1, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. + */ + VMA_MEMORY_USAGE_CPU_ONLY = 2, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_TO_GPU = 3, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + */ + VMA_MEMORY_USAGE_GPU_TO_CPU = 4, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_COPY = 5, + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + + Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + + Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + */ + VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, + + VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF +} VmaMemoryUsage; + +/// Flags to be passed as VmaAllocationCreateInfo::flags. +typedef enum VmaAllocationCreateFlagBits +{ + /** \brief Set this flag if the allocation should have its own memory block. + + Use it for special, big resources, like fullscreen images used as attachments. + + If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` + structure is applied if possible. + */ + VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, + + /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. + + If new allocation cannot be placed in any of the existing blocks, allocation + fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + + You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and + #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. + */ + VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, + /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. + + Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. + + It is valid to use this flag for allocation made from memory type that is not + `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is + useful if you need an allocation that is efficient to use on GPU + (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that + support it (e.g. Intel GPU). + */ + VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + null-terminated string. Instead of copying pointer value, a local copy of the + string is made and stored in allocation's `pName`. The string is automatically + freed together with the allocation. It is also used in vmaBuildStatsString(). + */ + VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, + /** Create both buffer/image and allocation, but don't bind them together. + It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. + The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). + Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. + */ + VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, + /** Create allocation only if additional device memory required for it, if any, won't exceed + memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + */ + VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, + /** \brief Set this flag if the allocated memory will have aliasing resources. + + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. + Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. + */ + VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, + /** Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000, + /** Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recommended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** A bit mask to extract only `STRATEGY` bits from entire set of flags. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MASK = + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + + VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. +typedef VkFlags VmaAllocationCreateFlags; + +/// Flags to be passed as VmaPoolCreateInfo::flags. +typedef enum VmaPoolCreateFlagBits +{ + /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. + + This is an optional optimization flag. + + If you always allocate using vmaCreateBuffer(), vmaCreateImage(), + vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator + knows exact type of your allocations so it can handle Buffer-Image Granularity + in the optimal way. + + If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), + exact type of such allocations is not known, so allocator must be conservative + in handling Buffer-Image Granularity, which can lead to suboptimal allocation + (wasted memory). In that case, if you can make sure you always allocate only + buffers and linear images or only optimal images out of this pool, use this flag + to make allocator disregard Buffer-Image Granularity and so make allocations + faster and more optimal. + */ + VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, + + /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_POOL_CREATE_ALGORITHM_MASK = + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaPoolCreateFlagBits; +/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. +typedef VkFlags VmaPoolCreateFlags; + +/// Flags to be passed as VmaDefragmentationInfo::flags. +typedef enum VmaDefragmentationFlagBits +{ + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. +typedef VkFlags VmaDefragmentationFlags; + +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. +typedef enum VmaVirtualBlockCreateFlagBits +{ + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, + + /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualBlockCreateFlagBits; +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits. +typedef VkFlags VmaVirtualBlockCreateFlags; + +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. +typedef enum VmaVirtualAllocationCreateFlagBits +{ + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT, + /** \brief Allocation strategy that tries to minimize memory usage. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** \brief Allocation strategy that tries to minimize allocation time. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK, + + VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualAllocationCreateFlagBits; +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits. +typedef VkFlags VmaVirtualAllocationCreateFlags; + +/** @} */ + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_DATA_TYPES_DECLARATIONS + +/** +\addtogroup group_init +@{ */ + +/** \struct VmaAllocator +\brief Represents main object of this library initialized. + +Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. +Call function vmaDestroyAllocator() to destroy it. + +It is recommended to create just one object of this type per `VkDevice` object, +right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +*/ +VK_DEFINE_HANDLE(VmaAllocator) + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \struct VmaPool +\brief Represents custom memory pool + +Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. +Call function vmaDestroyPool() to destroy it. + +For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). +*/ +VK_DEFINE_HANDLE(VmaPool) + +/** \struct VmaAllocation +\brief Represents single memory allocation. + +It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type +plus unique offset. + +There are multiple ways to create such object. +You need to fill structure VmaAllocationCreateInfo. +For more information see [Choosing memory type](@ref choosing_memory_type). + +Although the library provides convenience functions that create Vulkan buffer or image, +allocate memory for it and bind them together, +binding of the allocation to a buffer or an image is out of scope of the allocation itself. +Allocation object can exist without buffer/image bound, +binding can be done manually by the user, and destruction of it can be done +independently of destruction of the allocation. + +The object also remembers its size and some other information. +To retrieve this information, use function vmaGetAllocationInfo() and inspect +returned structure VmaAllocationInfo. +*/ +VK_DEFINE_HANDLE(VmaAllocation) + +/** \struct VmaDefragmentationContext +\brief An opaque object that represents started defragmentation process. + +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. +*/ +VK_DEFINE_HANDLE(VmaDefragmentationContext) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualAllocation +\brief Represents single memory allocation done inside VmaVirtualBlock. + +Use it as a unique identifier to virtual allocation within the single block. + +Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. +*/ +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualBlock +\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory. + +Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it. +For more information, see documentation chapter \ref virtual_allocator. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +VK_DEFINE_HANDLE(VmaVirtualBlock) + +/** @} */ + +/** +\addtogroup group_init +@{ +*/ + +/// Callback function called after successful vkAllocateMemory. +typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/// Callback function called before vkFreeMemory. +typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. + +Provided for informative purpose, e.g. to gather statistics about number of +allocations or total amount of memory allocated in Vulkan. + +Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. +*/ +typedef struct VmaDeviceMemoryCallbacks +{ + /// Optional, can be null. + PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; + /// Optional, can be null. + PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; + /// Optional, can be null. + void* VMA_NULLABLE pUserData; +} VmaDeviceMemoryCallbacks; + +/** \brief Pointers to some Vulkan functions - a subset used by the library. + +Used in VmaAllocatorCreateInfo::pVulkanFunctions. +*/ +typedef struct VmaVulkanFunctions +{ + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr; + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; + PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; + PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory; + PFN_vkMapMemory VMA_NULLABLE vkMapMemory; + PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory; + PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges; + PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges; + PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory; + PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory; + PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements; + PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements; + PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer; + PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer; + PFN_vkCreateImage VMA_NULLABLE vkCreateImage; + PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; + PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; +#endif +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; +#endif +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. + PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; +#endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#else + void* VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#endif +} VmaVulkanFunctions; + +/// Description of a Allocator to be created. +typedef struct VmaAllocatorCreateInfo +{ + /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum. + VmaAllocatorCreateFlags flags; + /// Vulkan physical device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /// Vulkan device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkDevice VMA_NOT_NULL device; + /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional. + /** Set to 0 to use default, which is currently 256 MiB. */ + VkDeviceSize preferredLargeHeapBlockSize; + /// Custom CPU memory allocation callbacks. Optional. + /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; + /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. + /** Optional, can be null. */ + const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; + /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. + + If not NULL, it must be a pointer to an array of + `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on + maximum number of bytes that can be allocated out of particular Vulkan memory + heap. + + Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that + heap. This is also the default in case of `pHeapSizeLimit` = NULL. + + If there is a limit defined for a heap: + + - If user tries to allocate more memory from that heap using this allocator, + the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the + value of this limit will be reported instead when using vmaGetMemoryProperties(). + + Warning! Using this feature may not be equivalent to installing a GPU with + smaller amount of memory, because graphics driver doesn't necessary fail new + allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is + exceeded. It may return success and just silently migrate some device memory + blocks to system RAM. This driver behavior can also be controlled using + VK_AMD_memory_overallocation_behavior extension. + */ + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit; + + /** \brief Pointers to Vulkan functions. Can be null. + + For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). + */ + const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; + /** \brief Handle to Vulkan instance object. + + Starting from version 3.0.0 this member is no longer optional, it must be set! + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Optional. Vulkan version that the application uses. + + It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. + The patch version number specified is ignored. Only the major and minor versions are considered. + Only versions 1.0...1.4 are supported by the current implementation. + Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + It must match the Vulkan version used by the application and supported on the selected physical device, + so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` + and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. + */ + uint32_t vulkanApiVersion; +#if VMA_EXTERNAL_MEMORY + /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type. + + If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount` + elements, defining external memory handle types of particular Vulkan memory type, + to be passed using `VkExportMemoryAllocateInfoKHR`. + + Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type. + This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL. + */ + const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes; +#endif // #if VMA_EXTERNAL_MEMORY +} VmaAllocatorCreateInfo; + +/// Information about existing #VmaAllocator object. +typedef struct VmaAllocatorInfo +{ + /** \brief Handle to Vulkan instance object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::instance. + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Handle to Vulkan physical device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice. + */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /** \brief Handle to Vulkan device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::device. + */ + VkDevice VMA_NOT_NULL device; +} VmaAllocatorInfo; + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics +{ + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ + uint32_t blockCount; + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ + uint32_t allocationCount; + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. + */ + VkDeviceSize blockBytes; + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. + */ + VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; + /// Number of free ranges of memory between allocations. + uint32_t unusedRangeCount; + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics +{ + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; + +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ + /** \brief Statistics fetched from the library. + */ + VmaStatistics statistics; + /** \brief Estimated current memory usage of the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or + `VkDeviceMemory` blocks allocated outside of this library, if any. + */ + VkDeviceSize usage; + /** \brief Estimated amount of memory available to the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors + external to the program, decided by the operating system. + Difference `budget - usage` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + VkDeviceSize budget; +} VmaBudget; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ +typedef struct VmaAllocationCreateInfo +{ + /// Use #VmaAllocationCreateFlagBits enum. + VmaAllocationCreateFlags flags; + /** \brief Intended usage of memory. + + You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored. + */ + VmaMemoryUsage usage; + /** \brief Flags that must be set in a Memory Type chosen for an allocation. + + Leave 0 if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored.*/ + VkMemoryPropertyFlags requiredFlags; + /** \brief Flags that preferably should be set in a memory type chosen for an allocation. + + Set to 0 if no additional flags are preferred. \n + If `pool` is not null, this member is ignored. */ + VkMemoryPropertyFlags preferredFlags; + /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. + + Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if + it meets other requirements specified by this structure, with no further + restrictions on memory type index. \n + If `pool` is not null, this member is ignored. + */ + uint32_t memoryTypeBits; + /** \brief Pool that this allocation should be created in. + + Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: + `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). + + If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either + null or pointer to a null-terminated string. The string will be then copied to + internal buffer, so it doesn't need to be valid after allocation call. + */ + void* VMA_NULLABLE pUserData; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object + and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. + */ + float priority; +} VmaAllocationCreateInfo; + +/// Describes parameter of created #VmaPool. +typedef struct VmaPoolCreateInfo +{ + /** \brief Vulkan memory type index to allocate this pool from. + */ + uint32_t memoryTypeIndex; + /** \brief Use combination of #VmaPoolCreateFlagBits. + */ + VmaPoolCreateFlags flags; + /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. + + Specify nonzero to set explicit, constant size of memory blocks used by this + pool. + + Leave 0 to use default and let the library manage block sizes automatically. + Sizes of particular blocks may vary. + In this case, the pool will also support dedicated allocations. + */ + VkDeviceSize blockSize; + /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. + + Set to 0 to have no preallocated blocks and allow the pool be completely empty. + */ + size_t minBlockCount; + /** \brief Maximum number of blocks that can be allocated in this pool. Optional. + + Set to 0 to use default, which is `SIZE_MAX`, which means no limit. + + Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated + throughout whole lifetime of this pool. + */ + size_t maxBlockCount; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. + Otherwise, this variable is ignored. + */ + float priority; + /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + + Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. + It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough, + e.g. when doing interop with OpenGL. + */ + VkDeviceSize minAllocationAlignment; + /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional. + + Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`. + It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`. + Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool. + + Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, + can be attached automatically by this library when using other, more convenient of its features. + */ + void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; +} VmaPoolCreateInfo; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ +typedef struct VmaAllocationInfo +{ + /** \brief Memory type index that this allocation was allocated from. + + It never changes. + */ + uint32_t memoryType; + /** \brief Handle to Vulkan memory object. + + Same memory object can be shared by multiple allocations. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; + /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + + You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function + vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, + not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation + and apply this offset automatically. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceSize offset; + /** \brief Size of this allocation, in bytes. + + It never changes. + + \note Allocation size returned in this variable may be greater than the size + requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the + allocation is accessible for operations on memory e.g. using a pointer after + mapping with vmaMapMemory(), but operations on the resource e.g. using + `vkCmdCopyBuffer` must be limited to the size of the resource. + */ + VkDeviceSize size; + /** \brief Pointer to the beginning of this allocation as mapped data. + + If the allocation hasn't been mapped using vmaMapMemory() and hasn't been + created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. + + It can change after call to vmaMapMemory(), vmaUnmapMemory(). + It can also change after the allocation is moved during \ref defragmentation. + */ + void* VMA_NULLABLE pMappedData; + /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). + + It can change after call to vmaSetAllocationUserData() for this allocation. + */ + void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; +} VmaAllocationInfo; + +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ + /** \brief Basic parameters of the allocation. + + If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. + */ + VmaAllocationInfo allocationInfo; + /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. + + In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. + */ + VkDeviceSize blockSize; + /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. + + When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation + (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). + */ + VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + +/** \brief Parameters for defragmentation. + +To be used with function vmaBeginDefragmentation(). +*/ +typedef struct VmaDefragmentationInfo +{ + /// \brief Use combination of #VmaDefragmentationFlagBits. + VmaDefragmentationFlags flags; + /** \brief Custom pool to be defragmented. + + If null then default pools will undergo defragmentation process. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + `0` means no limit. + */ + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + `0` means no limit. + */ + uint32_t maxAllocationsPerPass; + /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + + Have to return true for breaking current defragmentation pass. + */ + PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; + /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. + void* VMA_NULLABLE pBreakCallbackUserData; +} VmaDefragmentationInfo; + +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove +{ + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassMoveInfo +{ + /// Number of elements in the `pMoves` array. + uint32_t moveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). + + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation: + + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. + */ + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; + +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). +typedef struct VmaDefragmentationStats +{ + /// Total number of bytes that have been copied while moving allocations to different places. + VkDeviceSize bytesMoved; + /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. + VkDeviceSize bytesFreed; + /// Number of allocations that have been moved to different places. + uint32_t allocationsMoved; + /// Number of empty `VkDeviceMemory` objects that have been released to the system. + uint32_t deviceMemoryBlocksFreed; +} VmaDefragmentationStats; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock(). +typedef struct VmaVirtualBlockCreateInfo +{ + /** \brief Total size of the virtual block. + + Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. + For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. + */ + VkDeviceSize size; + + /** \brief Use combination of #VmaVirtualBlockCreateFlagBits. + */ + VmaVirtualBlockCreateFlags flags; + + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, they will be used for all CPU-side memory allocations. + */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +} VmaVirtualBlockCreateInfo; + +/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate(). +typedef struct VmaVirtualAllocationCreateInfo +{ + /** \brief Size of the allocation. + + Cannot be zero. + */ + VkDeviceSize size; + /** \brief Required alignment of the allocation. Optional. + + Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. + */ + VkDeviceSize alignment; + /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits. + */ + VmaVirtualAllocationCreateFlags flags; + /** \brief Custom pointer to be associated with the allocation. Optional. + + It can be any value and can be used for user-defined purposes. It can be fetched or changed later. + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationCreateInfo; + +/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo(). +typedef struct VmaVirtualAllocationInfo +{ + /** \brief Offset of the allocation. + + Offset at which the allocation was made. + */ + VkDeviceSize offset; + /** \brief Size of the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::size. + */ + VkDeviceSize size; + /** \brief Custom pointer associated with the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData(). + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationInfo; + +/** @} */ + +#endif // _VMA_DATA_TYPES_DECLARATIONS + +#ifndef _VMA_FUNCTION_HEADERS + +/** +\addtogroup group_init +@{ +*/ + +/// Creates #VmaAllocator object. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator); + +/// Destroys allocator object. +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator VMA_NULLABLE allocator); + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); + +/** +PhysicalDeviceProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties); + +/** +PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); + +/** +\brief Given Memory Type Index, returns Property Flags of this memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Sets index of the current frame. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t frameIndex); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics from current state of the Allocator. + +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaTotalStatistics* VMA_NOT_NULL pStats); + +/** \brief Retrieves information about current memory usage and budget for all memory heaps. + +\param allocator +\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator VMA_NOT_NULL allocator, + VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo. + +This algorithm tries to find a memory type that: + +- Is allowed by memoryTypeBits. +- Contains all the flags from pAllocationCreateInfo->requiredFlags. +- Matches intended usage. +- Has as many flags from pAllocationCreateInfo->preferredFlags as possible. + +\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result +from this function or any other allocating function probably means that your +device doesn't support any memory type with requested features for the specific +type of resource you want to use it for. Please check parameters of your +resource, like image layout (OPTIMAL versus LINEAR) or mip level count. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy buffer that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy image that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** \brief Allocates Vulkan device memory and creates #VmaPool object. + +\param allocator Allocator object. +\param pCreateInfo Parameters of pool to create. +\param[out] pPool Handle to created pool. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator VMA_NOT_NULL allocator, + const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool); + +/** \brief Destroys #VmaPool object and frees Vulkan device memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NULLABLE pool); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. + +Note that when using the pool from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool); + +/** \brief Retrieves name of a custom pool. + +After the call `ppName` is either null or points to an internally-owned null-terminated string +containing name of the pool that was previously set. The pointer becomes invalid when the pool is +destroyed or its name is changed using vmaSetPoolName(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE* VMA_NOT_NULL ppName); + +/** \brief Sets name of a custom pool. + +`pName` can be either null or pointer to a null-terminated string with new name for the pool. +Function makes internal copy of the string, so it can be changed or freed immediately after this call. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE pName); + +/** \brief General purpose memory allocation. + +\param allocator +\param pVkMemoryRequirements +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), +vmaCreateBuffer(), vmaCreateImage() instead whenever possible. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief General purpose memory allocation for multiple allocation objects at once. + +\param allocator Allocator object. +\param pVkMemoryRequirements Memory requirements for each allocation. +\param pCreateInfo Creation parameters for each allocation. +\param allocationCount Number of allocations to make. +\param[out] pAllocations Pointer to array that will be filled with handles to created allocations. +\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding. +It is just a general purpose allocation function able to make multiple allocations at once. +It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times. + +All allocations are made using same parameters. All of them are created out of the same memory pool and type. +If any allocation fails, all allocations already made within this function call are also freed, so that when +returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, + size_t allocationCount, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, + VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkBuffer`. + +\param allocator +\param buffer +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). + +Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VmaAllocation VMA_NULLABLE allocation); + +/** \brief Frees memory and destroys multiple allocations. + +Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding. +It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(), +vmaAllocateMemoryPages() and other functions. +It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times. + +Allocations in `pAllocations` array can come from any memory pools and types. +Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + size_t allocationCount, + const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); + +/** \brief Returns current information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. + +Although this function doesn't lock any mutex, so it should be quite efficient, +you should avoid calling it too often. +You can retrieve same VmaAllocationInfo structure while creating your resource, from function +vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change +(e.g. due to defragmentation). + +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); + +/** \brief Returns extended information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); + +/** \brief Sets pUserData in given allocation to new value. + +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. +as a pointer, ordinal number or some handle to you own data. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + +/** +\brief Given an allocation, returns Property Flags of its memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetAllocationInfo() + vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + + +#if VMA_EXTERNAL_MEMORY_WIN32 +/** +\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs. + +\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns + handle for the current process. +\param[out] pHandle Output parameter that returns the handle. + +The function fills `pHandle` with handle that can be used in target process. +The handle is fetched using function `vkGetMemoryWin32HandleKHR`. +When no longer needed, you must close it using: + +\code +CloseHandle(handle); +\endcode + +You can close it any time, before or after destroying the allocation object. +It is reference-counted internally by Windows. + +Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to. +If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation +(VmaAllocationInfo::offset). + +If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check +that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS` +or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions. + +For more information, see chapter \ref vk_khr_external_memory_win32. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle); +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +/** \brief Maps memory represented by given allocation and returns pointer to it. + +Maps memory represented by given allocation to make it accessible to CPU code. +When succeeded, `*ppData` contains pointer to first byte of this memory. + +\warning +If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is +correctly offsetted to the beginning of region assigned to this particular allocation. +Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block. +You should not add VmaAllocationInfo::offset to it! + +Mapping is internally reference-counted and synchronized, so despite raw Vulkan +function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` +multiple times simultaneously, it is safe to call this function on allocations +assigned to the same memory block. Actual Vulkan memory will be mapped on first +mapping and unmapped on last unmapping. + +If the function succeeded, you must call vmaUnmapMemory() to unmap the +allocation when mapping is no longer needed or before freeing the allocation, at +the latest. + +It also safe to call this function multiple times on the same allocation. You +must call vmaUnmapMemory() same number of times as you called vmaMapMemory(). + +It is also safe to call this function on allocation created with +#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time. +You must still call vmaUnmapMemory() same number of times as you called +vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the +"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag. + +This function fails when used on allocation made in memory type that is not +`HOST_VISIBLE`. + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE* VMA_NOT_NULL ppData); + +/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). + +For details, see description of vmaMapMemory(). + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation); + +/** \brief Flushes memory of given allocation. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`. +Unmap operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Invalidates memory of given allocation. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`. +Map operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if +it is called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Flushes memory of given set of allocations. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaFlushAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Invalidates memory of given set of allocations. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaInvalidateAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator VMA_NOT_NULL allocator, + const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, + VmaAllocation VMA_NOT_NULL dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, + VkDeviceSize size); + +/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. + +\param allocator +\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits); + +/** \brief Begins defragmentation process. + +\param allocator Allocator object. +\param pInfo Structure filled with parameters of defragmentation. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. + +For more information about defragmentation, see documentation chapter: +[Defragmentation](@ref defragmentation). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, + VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); + +/** \brief Ends defragmentation process. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. + +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed information for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Binds buffer to allocation. + +Binds specified buffer to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create a buffer, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindBufferMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateBuffer() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer); + +/** \brief Binds buffer to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param buffer +\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindBufferMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); + +/** \brief Binds image to allocation. + +Binds specified image to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create an image, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindImageMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateImage() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image); + +/** \brief Binds image to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param image +\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindImageMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); + +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + +\param allocator +\param pBufferCreateInfo +\param pAllocationCreateInfo +\param[out] pBuffer Buffer that was created. +\param[out] pAllocation Allocation that was created. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +This function automatically: + +-# Creates buffer. +-# Allocates appropriate memory for it. +-# Binds the buffer with the memory. + +If any of these operations fail, buffer and allocation are not created, +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. + +If the function succeeded, you must destroy both buffer and allocation when you +no longer need them using either convenience function vmaDestroyBuffer() or +separately, using `vkDestroyBuffer()` and vmaFreeMemory(). + +If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, +VK_KHR_dedicated_allocation extension is used internally to query driver whether +it requires or prefers the new buffer to have dedicated allocation. If yes, +and if dedicated allocation is possible +(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +allocation for this buffer, just like when using +#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + +\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer, +although recommended as a good practice, is out of scope of this library and could be implemented +by the user as a higher-level logic on top of VMA. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a buffer with additional minimum alignment. + +Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom, +minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g. +for interop with OpenGL. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note This is a new version of the function augmented with parameter `allocationLocalOffset`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Destroys Vulkan buffer and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyBuffer(device, buffer, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as buffer and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer, + VmaAllocation VMA_NULLABLE allocation); + +/// Function similar to vmaCreateBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/// Function similar to vmaCreateAliasingBuffer() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/// Function similar to vmaCreateAliasingBuffer2() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/** \brief Destroys Vulkan image and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyImage(device, image, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as image and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation); + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \brief Creates new #VmaVirtualBlock object. + +\param pCreateInfo Parameters for creation. +\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock); + +/** \brief Destroys #VmaVirtualBlock object. + +Please note that you should consciously handle virtual allocations that could remain unfreed in the block. +You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock() +if you are sure this is what you want. If you do neither, an assert is called. + +If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`, +don't forget to free them. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock( + VmaVirtualBlock VMA_NULLABLE virtualBlock); + +/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations. +*/ +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo); + +/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock. + +If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned +(despite the function doesn't ever allocate actual GPU memory). +`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`. + +\param virtualBlock Virtual block +\param pCreateInfo Parameters for the allocation +\param[out] pAllocation Returned handle of the new allocation +\param[out] pOffset Returned offset of the new allocation. Optional, can be null. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset); + +/** \brief Frees virtual allocation inside given #VmaVirtualBlock. + +It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation); + +/** \brief Frees all virtual allocations inside given #VmaVirtualBlock. + +You must either call this function or free each virtual allocation individually with vmaVirtualFree() +before destroying a virtual block. Otherwise, an assert is called. + +If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`, +don't forget to free it as well. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Changes custom pointer associated with given virtual allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats); + +/** @} */ + +#if VMA_STATS_STRING_ENABLED +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. +\param virtualBlock Virtual block. +\param[out] ppStatsString Returned string. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. + +Returned string must be freed using vmaFreeVirtualBlockStatsString(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +/// Frees a string returned by vmaBuildVirtualBlockStatsString(). +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString); + +/** \brief Builds and returns statistics as a null-terminated string in JSON format. +\param allocator +\param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +\param detailedMap +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE pStatsString); + +/** @} */ + +#endif // VMA_STATS_STRING_ENABLED + +#endif // _VMA_FUNCTION_HEADERS + +#ifdef __cplusplus +} +#endif + +#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// For Visual Studio IntelliSense. +#if defined(__cplusplus) && defined(__INTELLISENSE__) +#define VMA_IMPLEMENTATION +#endif + +#ifdef VMA_IMPLEMENTATION +#undef VMA_IMPLEMENTATION + +#include +#include +#include +#include +#include +#include + +#if !defined(VMA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define VMA_CPP20 1 + #else + #define VMA_CPP20 0 + #endif +#endif + +#ifdef _MSC_VER + #include // For functions like __popcnt, _BitScanForward etc. +#endif +#if VMA_CPP20 + #include +#endif + +#if VMA_STATS_STRING_ENABLED + #include // For snprintf +#endif + +/******************************************************************************* +CONFIGURATION SECTION + +Define some of these macros before each #include of this header or change them +here if you need other then default behavior depending on your environment. +*/ +#ifndef _VMA_CONFIGURATION + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; +*/ +#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES) + #define VMA_STATIC_VULKAN_FUNCTIONS 1 +#endif + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); + +To use this feature in new versions of VMA you now have to pass +VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as +VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. +*/ +#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) + #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#endif + +#ifndef VMA_USE_STL_SHARED_MUTEX + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_USE_STL_SHARED_MUTEX 1 + // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus + // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. + #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L + #define VMA_USE_STL_SHARED_MUTEX 1 + #else + #define VMA_USE_STL_SHARED_MUTEX 0 + #endif +#endif + +/* +Define this macro to include custom header files without having to edit this file directly, e.g.: + + // Inside of "my_vma_configuration_user_includes.h": + + #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT + #include "my_custom_min.h" // for my_custom_min + #include + #include + + // Inside a different file, which includes "vk_mem_alloc.h": + + #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h" + #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr) + #define VMA_MIN(v1, v2) (my_custom_min(v1, v2)) + #include "vk_mem_alloc.h" + ... + +The following headers are used in this CONFIGURATION section only, so feel free to +remove them if not needed. +*/ +#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) + #include // for assert + #include // for min, max, swap + #include +#else + #include VMA_CONFIGURATION_USER_INCLUDES_H +#endif + +#ifndef VMA_NULL + // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. + #define VMA_NULL nullptr +#endif + +#ifndef VMA_FALLTHROUGH + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_FALLTHROUGH [[fallthrough]] + #else + #define VMA_FALLTHROUGH + #endif +#endif + +// Normal assert to check for programmer's errors, especially in Debug configuration. +#ifndef VMA_ASSERT + #ifdef NDEBUG + #define VMA_ASSERT(expr) + #else + #define VMA_ASSERT(expr) assert(expr) + #endif +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef VMA_HEAVY_ASSERT + #ifdef NDEBUG + #define VMA_HEAVY_ASSERT(expr) + #else + #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) + #endif +#endif + +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK + #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr) +#endif + +// If your compiler is not compatible with C++17 and definition of +// aligned_alloc() function is missing, uncommenting following line may help: + +//#include + +#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) +#include +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + return memalign(alignment, size); +} +#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)) +#include + +#if defined(__APPLE__) +#include +#endif + +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4) + // Therefore, for now disable this specific exception until a proper solution is found. + //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) + //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 + // // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only + // // with the MacOSX11.0 SDK in Xcode 12 (which is what adds + // // MAC_OS_X_VERSION_10_16), even though the function is marked + // // available for 10.15. That is why the preprocessor checks for 10.16 but + // // the __builtin_available checks for 10.15. + // // People who use C++17 could call aligned_alloc with the 10.15 SDK already. + // if (__builtin_available(macOS 10.15, iOS 13, *)) + // return aligned_alloc(alignment, size); + //#endif + //#endif + + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + void *pointer; + if(posix_memalign(&pointer, alignment, size) == 0) + return pointer; + return VMA_NULL; +} +#elif defined(_WIN32) +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return _aligned_malloc(size, alignment); +} +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return aligned_alloc(alignment, size); +} +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system."); + return VMA_NULL; +} +#endif + +#if defined(_WIN32) +static void vma_aligned_free(void* ptr) +{ + _aligned_free(ptr); +} +#else +static void vma_aligned_free(void* VMA_NULLABLE ptr) +{ + free(ptr); +} +#endif + +#ifndef VMA_ALIGN_OF + #define VMA_ALIGN_OF(type) (alignof(type)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_MALLOC + #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_FREE + // VMA_SYSTEM_FREE is the old name, but might have been defined by the user + #if defined(VMA_SYSTEM_FREE) + #define VMA_SYSTEM_ALIGNED_FREE(ptr) VMA_SYSTEM_FREE(ptr) + #else + #define VMA_SYSTEM_ALIGNED_FREE(ptr) vma_aligned_free(ptr) + #endif +#endif + +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + +#ifndef VMA_BITSCAN_LSB + // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) +#endif + +#ifndef VMA_BITSCAN_MSB + // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask) +#endif + +#ifndef VMA_MIN + #define VMA_MIN(v1, v2) ((std::min)((v1), (v2))) +#endif + +#ifndef VMA_MAX + #define VMA_MAX(v1, v2) ((std::max)((v1), (v2))) +#endif + +#ifndef VMA_SORT + #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) +#endif + +#ifndef VMA_DEBUG_LOG_FORMAT + #define VMA_DEBUG_LOG_FORMAT(format, ...) + /* + #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) + */ +#endif + +#ifndef VMA_DEBUG_LOG + #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT + #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY + #define VMA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE + #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; +#endif + +// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. +#if VMA_STATS_STRING_ENABLED + static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) + { + snprintf(outStr, strLen, "%" PRIu32, num); + } + static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) + { + snprintf(outStr, strLen, "%" PRIu64, num); + } + static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) + { + snprintf(outStr, strLen, "%p", ptr); + } +#endif + +#ifndef VMA_MUTEX + class VmaMutex + { + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) + public: + VmaMutex() { } + void Lock() { m_Mutex.lock(); } + void Unlock() { m_Mutex.unlock(); } + bool TryLock() { return m_Mutex.try_lock(); } + private: + std::mutex m_Mutex; + }; + #define VMA_MUTEX VmaMutex +#endif + +// Read-write mutex, where "read" is shared access, "write" is exclusive access. +#ifndef VMA_RW_MUTEX + #if VMA_USE_STL_SHARED_MUTEX + // Use std::shared_mutex from C++17. + #include + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.lock_shared(); } + void UnlockRead() { m_Mutex.unlock_shared(); } + bool TryLockRead() { return m_Mutex.try_lock_shared(); } + void LockWrite() { m_Mutex.lock(); } + void UnlockWrite() { m_Mutex.unlock(); } + bool TryLockWrite() { return m_Mutex.try_lock(); } + private: + std::shared_mutex m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600 + // Use SRWLOCK from WinAPI. + // Minimum supported client = Windows Vista, server = Windows Server 2008. + class VmaRWMutex + { + public: + VmaRWMutex() { InitializeSRWLock(&m_Lock); } + void LockRead() { AcquireSRWLockShared(&m_Lock); } + void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; } + void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } + void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; } + private: + SRWLOCK m_Lock; + }; + #define VMA_RW_MUTEX VmaRWMutex + #else + // Less efficient fallback: Use normal mutex. + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.Lock(); } + void UnlockRead() { m_Mutex.Unlock(); } + bool TryLockRead() { return m_Mutex.TryLock(); } + void LockWrite() { m_Mutex.Lock(); } + void UnlockWrite() { m_Mutex.Unlock(); } + bool TryLockWrite() { return m_Mutex.TryLock(); } + private: + VMA_MUTEX m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #endif // #if VMA_USE_STL_SHARED_MUTEX +#endif // #ifndef VMA_RW_MUTEX + +/* +If providing your own implementation, you need to implement a subset of std::atomic. +*/ +#ifndef VMA_ATOMIC_UINT32 + #include + #define VMA_ATOMIC_UINT32 std::atomic +#endif + +#ifndef VMA_ATOMIC_UINT64 + #include + #define VMA_ATOMIC_UINT64 std::atomic +#endif + +#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY + /** + Every allocation will have its own memory block. + Define to 1 for debugging purposes only. + */ + #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) +#endif + +#ifndef VMA_MIN_ALIGNMENT + /** + Minimum alignment of all allocations, in bytes. + Set to more than 1 for debugging purposes. Must be power of two. + */ + #ifdef VMA_DEBUG_ALIGNMENT // Old name + #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT + #else + #define VMA_MIN_ALIGNMENT (1) + #endif +#endif + +#ifndef VMA_DEBUG_MARGIN + /** + Minimum margin after every allocation, in bytes. + Set nonzero for debugging purposes only. + */ + #define VMA_DEBUG_MARGIN (0) +#endif + +#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS + /** + Define this macro to 1 to automatically fill new allocations and destroyed + allocations with some bit pattern. + */ + #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0) +#endif + +#ifndef VMA_DEBUG_DETECT_CORRUPTION + /** + Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to + enable writing magic value to the margin after every allocation and + validating it, so that memory corruptions (out-of-bounds writes) are detected. + */ + #define VMA_DEBUG_DETECT_CORRUPTION (0) +#endif + +#ifndef VMA_DEBUG_GLOBAL_MUTEX + /** + Set this to 1 for debugging purposes only, to enable single mutex protecting all + entry calls to the library. Can be useful for debugging multithreading issues. + */ + #define VMA_DEBUG_GLOBAL_MUTEX (0) +#endif + +#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY + /** + Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity. + Set to more than 1 for debugging purposes only. Must be power of two. + */ + #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) +#endif + +#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + /* + Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount + and return error instead of leaving up to Vulkan implementation what to do in such cases. + */ + #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0) +#endif + +#ifndef VMA_SMALL_HEAP_MAX_SIZE + /// Maximum size of a memory heap in Vulkan to consider it "small". + #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024) +#endif + +#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE + /// Default size of a block allocated as single VkDeviceMemory from a "large" heap. + #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) +#endif + +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDoc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + +#define VMA_VALIDATE(cond) do { if(!(cond)) { \ + VMA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ + } } while(false) + +/******************************************************************************* +END OF CONFIGURATION +*/ +#endif // _VMA_CONFIGURATION + + +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. +static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; + +// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; +static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; +static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; +static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; +static const uint32_t VMA_VENDOR_ID_AMD = 4098; + +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + + +#if VMA_STATS_STRING_ENABLED +// Correspond to values of enum VmaSuballocationType. +static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = +{ + "FREE", + "UNKNOWN", + "BUFFER", + "IMAGE_UNKNOWN", + "IMAGE_LINEAR", + "IMAGE_OPTIMAL", +}; +#endif + +static VkAllocationCallbacks VmaEmptyAllocationCallbacks = + { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; + + +#ifndef _VMA_ENUM_DECLARATIONS + +enum VmaSuballocationType +{ + VMA_SUBALLOCATION_TYPE_FREE = 0, + VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, + VMA_SUBALLOCATION_TYPE_BUFFER = 2, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, + VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +}; + +enum VMA_CACHE_OPERATION +{ + VMA_CACHE_FLUSH, + VMA_CACHE_INVALIDATE +}; + +enum class VmaAllocationRequestType +{ + Normal, + TLSF, + // Used by "Linear" algorithm. + UpperAddress, + EndOf1st, + EndOf2nd, +}; + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_FORWARD_DECLARATIONS +// Opaque handle used by allocation algorithms to identify single allocation in any conforming way. +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle); + +struct VmaMutexLock; +struct VmaMutexLockRead; +struct VmaMutexLockWrite; + +template +struct AtomicTransactionalIncrement; + +template +struct VmaStlAllocator; + +template +class VmaVector; + +template +class VmaSmallVector; + +template +class VmaPoolAllocator; + +template +struct VmaListItem; + +template +class VmaRawList; + +template +class VmaList; + +template +class VmaIntrusiveLinkedList; + +#if VMA_STATS_STRING_ENABLED +class VmaStringBuilder; +class VmaJsonWriter; +#endif + +class VmaDeviceMemoryBlock; + +struct VmaDedicatedAllocationListItemTraits; +class VmaDedicatedAllocationList; + +struct VmaSuballocation; +struct VmaSuballocationOffsetLess; +struct VmaSuballocationOffsetGreater; +struct VmaSuballocationItemSizeLess; + +typedef VmaList> VmaSuballocationList; + +struct VmaAllocationRequest; + +class VmaBlockMetadata; +class VmaBlockMetadata_Linear; +class VmaBlockMetadata_TLSF; + +class VmaBlockVector; + +struct VmaPoolListItemTraits; + +struct VmaCurrentBudgetData; + +class VmaAllocationObjectAllocator; + +#endif // _VMA_FORWARD_DECLARATIONS + + +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use intrinsics like: + +Visual Studio: + return __popcnt(v); +GCC, Clang: + return static_cast(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ +static inline uint32_t VmaCountBitsSet(uint32_t v) +{ +#if VMA_CPP20 + return std::popcount(v); +#else + uint32_t c = v - ((v >> 1) & 0x55555555); + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; + c = ((c >> 16) + c) & 0x0000FFFF; + return c; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + uint8_t pos = 0; + uint64_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + uint8_t pos = 0; + uint32_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanMSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 63 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + uint8_t pos = 63; + uint64_t bit = 1ULL << 63; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +static inline uint8_t VmaBitScanMSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 31 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + uint8_t pos = 31; + uint32_t bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +/* +Returns true if given number is a power of two. +T must be unsigned integer number or signed integer but always nonnegative. +For 0 returns true. +*/ +template +inline bool VmaIsPow2(T x) +{ + return (x & (x - 1)) == 0; +} + +// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignUp(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return (val + alignment - 1) & ~(alignment - 1); +} + +// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignDown(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template +static inline T VmaRoundDiv(T x, T y) +{ + return (x + (y / (T)2)) / y; +} + +// Divide by 'y' and round up to nearest integer. +template +static inline T VmaDivideRoundingUp(T x, T y) +{ + return (x + y - (T)1) / y; +} + +// Returns smallest power of 2 greater or equal to v. +static inline uint32_t VmaNextPow2(uint32_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +static inline uint64_t VmaNextPow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; +} + +// Returns largest power of 2 less or equal to v. +static inline uint32_t VmaPrevPow2(uint32_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v = v ^ (v >> 1); + return v; +} + +static inline uint64_t VmaPrevPow2(uint64_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v = v ^ (v >> 1); + return v; +} + +static inline bool VmaStrIsEmpty(const char* pStr) +{ + return pStr == VMA_NULL || *pStr == '\0'; +} + +/* +Returns true if two memory blocks occupy overlapping pages. +ResourceA must be in less memory offset than ResourceB. + +Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)" +chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity". +*/ +static inline bool VmaBlocksOnSamePage( + VkDeviceSize resourceAOffset, + VkDeviceSize resourceASize, + VkDeviceSize resourceBOffset, + VkDeviceSize pageSize) +{ + VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0); + VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1; + VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1); + VkDeviceSize resourceBStart = resourceBOffset; + VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1); + return resourceAEndPage == resourceBStartPage; +} + +/* +Returns true if given suballocation types could conflict and must respect +VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer +or linear image and another one is optimal image. If type is unknown, behave +conservatively. +*/ +static inline bool VmaIsBufferImageGranularityConflict( + VmaSuballocationType suballocType1, + VmaSuballocationType suballocType2) +{ + if (suballocType1 > suballocType2) + { + std::swap(suballocType1, suballocType2); + } + + switch (suballocType1) + { + case VMA_SUBALLOCATION_TYPE_FREE: + return false; + case VMA_SUBALLOCATION_TYPE_UNKNOWN: + return true; + case VMA_SUBALLOCATION_TYPE_BUFFER: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL: + return false; + default: + VMA_ASSERT(0); + return true; + } +} + +static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + uint32_t* pDst = (uint32_t*)((char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pDst) + { + *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; + } +#else + // no-op +#endif +} + +static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pSrc) + { + if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) + { + return false; + } + } +#endif + return true; +} + +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ + memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); + outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + + +/* +Performs binary search and returns iterator to first element that is greater or +equal to (key), according to comparison (cmp). + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or place where +new element with value (key) should be inserted. +*/ +template +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) +{ + size_t down = 0, up = size_t(end - beg); + while (down < up) + { + const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation + if (cmp(*(beg + mid), key)) + { + down = mid + 1; + } + else + { + up = mid; + } + } + return beg + down; +} + +template +IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ + IterT it = VmaBinaryFindFirstNotLess( + beg, end, value, cmp); + if (it == end || + (!cmp(*it, value) && !cmp(value, *it))) + { + return it; + } + return end; +} + +/* +Returns true if all pointers in the array are not-null and unique. +Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT. +T must be pointer type, e.g. VmaAllocation, VmaPool. +*/ +template +static bool VmaValidatePointerArray(uint32_t count, const T* arr) +{ + for (uint32_t i = 0; i < count; ++i) + { + const T iPtr = arr[i]; + if (iPtr == VMA_NULL) + { + return false; + } + for (uint32_t j = i + 1; j < count; ++j) + { + if (iPtr == arr[j]) + { + return false; + } + } + } + return true; +} + +template +static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) +{ + newStruct->pNext = mainStruct->pNext; + mainStruct->pNext = newStruct; +} +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ + for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; + s != VMA_NULL; s = s->pNext) + { + if(s->sType == sType) + { + return (const FindT*)s; + } + } + return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 + typedef uint64_t BaseType; // VkFlags64 +#else + typedef uint32_t BaseType; // VkFlags32 +#endif + + static const VmaBufferImageUsage UNKNOWN; + + BaseType Value; + + VmaBufferImageUsage() { *this = UNKNOWN; } + explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } + VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); + explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + + bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } + bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + + bool Contains(BaseType flag) const { return (Value & flag) != 0; } + bool ContainsDeviceAccess() const + { + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. + return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, + bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 + if(useKhrMaintenance5) + { + // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, + // take usage from it and ignore VkBufferCreateInfo::usage, per specification + // of the VK_KHR_maintenance5 extension. + const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = + VmaPnextChainFind(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if(usageFlags2) + { + this->Value = usageFlags2->usage; + return; + } + } +#endif + + this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ + // Maybe in the future there will be VK_KHR_maintenanceN extension with structure + // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + + this->Value = (BaseType)createInfo.usage; +} + +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VmaBufferImageUsage bufImgUsage, + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." + " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); + return false; + } + + const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else + { + // Always CPU memory. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // if(deviceAccess) + // + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, + // unless there is a clear preference from the user not to do so. + // + // else: + // + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Memory allocation + +static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) +{ + void* result = VMA_NULL; + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnAllocation != VMA_NULL)) + { + result = (*pAllocationCallbacks->pfnAllocation)( + pAllocationCallbacks->pUserData, + size, + alignment, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + } + else + { + result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); + } + VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed."); + return result; +} + +static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) +{ + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnFree != VMA_NULL)) + { + (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); + } + else + { + VMA_SYSTEM_ALIGNED_FREE(ptr); + } +} + +template +static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +#define vma_new(allocator, type) new(VmaAllocate(allocator))(type) + +#define vma_new_array(allocator, type, count) new(VmaAllocateArray((allocator), (count)))(type) + +template +static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr) +{ + ptr->~T(); + VmaFree(pAllocationCallbacks, ptr); +} + +template +static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) +{ + if (ptr != VMA_NULL) + { + for (size_t i = count; i--; ) + { + ptr[i].~T(); + } + VmaFree(pAllocationCallbacks, ptr); + } +} + +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) +{ + if (srcStr != VMA_NULL) + { + const size_t len = strlen(srcStr); + char* const result = vma_new_array(allocs, char, len + 1); + memcpy(result, srcStr, len + 1); + return result; + } + return VMA_NULL; +} + +#if VMA_STATS_STRING_ENABLED +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen) +{ + if (srcStr != VMA_NULL) + { + char* const result = vma_new_array(allocs, char, strLen + 1); + memcpy(result, srcStr, strLen); + result[strLen] = '\0'; + return result; + } + return VMA_NULL; +} +#endif // VMA_STATS_STRING_ENABLED + +static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) +{ + if (str != VMA_NULL) + { + const size_t len = strlen(str); + vma_delete_array(allocs, str, len + 1); + } +} + +template +size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + const size_t indexToInsert = VmaBinaryFindFirstNotLess( + vector.data(), + vector.data() + vector.size(), + value, + CmpLess()) - vector.data(); + VmaVectorInsert(vector, indexToInsert, value); + return indexToInsert; +} + +template +bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + CmpLess comparator; + typename VectorT::iterator it = VmaBinaryFindFirstNotLess( + vector.begin(), + vector.end(), + value, + comparator); + if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) + { + size_t indexToRemove = it - vector.begin(); + VmaVectorRemove(vector, indexToRemove); + return true; + } + return false; +} +#endif // _VMA_FUNCTIONS + +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) +{ + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; +} + +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) +{ + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; +} + +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) +{ + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; +} + +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); +} + +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS + +#ifndef _VMA_MUTEX_LOCK +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct VmaMutexLock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) +public: + VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->Lock(); } + } + ~VmaMutexLock() { if (m_pMutex) { m_pMutex->Unlock(); } } + +private: + VMA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct VmaMutexLockRead +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) +public: + VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockRead(); } + } + ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct VmaMutexLockWrite +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) +public: + VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockWrite(); } + } + ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +#if VMA_DEBUG_GLOBAL_MUTEX + static VMA_MUTEX gDebugGlobalMutex; + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); +#else + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _VMA_MUTEX_LOCK + +#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT +// An object that increments given atomic but decrements it back in the destructor unless Commit() is called. +template +struct AtomicTransactionalIncrement +{ +public: + using T = decltype(AtomicT().load()); + + ~AtomicTransactionalIncrement() + { + if(m_Atomic) + --(*m_Atomic); + } + + void Commit() { m_Atomic = VMA_NULL; } + T Increment(AtomicT* atomic) + { + m_Atomic = atomic; + return m_Atomic->fetch_add(1); + } + +private: + AtomicT* m_Atomic = VMA_NULL; +}; +#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT + +#ifndef _VMA_STL_ALLOCATOR +// STL-compatible allocator. +template +struct VmaStlAllocator +{ + const VkAllocationCallbacks* const m_pCallbacks; + typedef T value_type; + + VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {} + template + VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) {} + VmaStlAllocator(const VmaStlAllocator&) = default; + VmaStlAllocator& operator=(const VmaStlAllocator&) = delete; + + T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } + void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } + + template + bool operator==(const VmaStlAllocator& rhs) const + { + return m_pCallbacks == rhs.m_pCallbacks; + } + template + bool operator!=(const VmaStlAllocator& rhs) const + { + return m_pCallbacks != rhs.m_pCallbacks; + } +}; +#endif // _VMA_STL_ALLOCATOR + +#ifndef _VMA_VECTOR +/* Class with interface compatible with subset of std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. */ +template +class VmaVector +{ +public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + VmaVector(const AllocatorT& allocator); + VmaVector(size_t count, const AllocatorT& allocator); + // This version of the constructor is here for compatibility with pre-C++14 std::vector. + // value is unused. + VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {} + VmaVector(const VmaVector& src); + VmaVector& operator=(const VmaVector& rhs); + ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); } + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_pArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + const T* data() const { return m_pArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + + iterator begin() { return m_pArray; } + iterator end() { return m_pArray + m_Count; } + const_iterator cbegin() const { return m_pArray; } + const_iterator cend() const { return m_pArray + m_Count; } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount); + void clear() { resize(0); } + void shrink_to_fit(); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + +private: + AllocatorT m_Allocator; + T* m_pArray; + size_t m_Count; + size_t m_Capacity; +}; + +#ifndef _VMA_VECTOR_FUNCTIONS +template +VmaVector::VmaVector(const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(VMA_NULL), + m_Count(0), + m_Capacity(0) {} + +template +VmaVector::VmaVector(size_t count, const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), + m_Count(count), + m_Capacity(count) {} + +template +VmaVector::VmaVector(const VmaVector& src) + : m_Allocator(src.m_Allocator), + m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count != 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +VmaVector& VmaVector::operator=(const VmaVector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +void VmaVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void VmaVector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = VMA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void VmaVector::resize(size_t newCount) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; + const size_t elementsToCopy = VMA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void VmaVector::shrink_to_fit() +{ + if (m_Capacity > m_Count) + { + T* newArray = VMA_NULL; + if (m_Count > 0) + { + newArray = VmaAllocateArray(m_Allocator.m_pCallbacks, m_Count); + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = m_Count; + m_pArray = newArray; + } +} + +template +void VmaVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void VmaVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_VECTOR_FUNCTIONS + +template +static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) +{ + vec.insert(index, item); +} + +template +static void VmaVectorRemove(VmaVector& vec, size_t index) +{ + vec.remove(index); +} +#endif // _VMA_VECTOR + +#ifndef _VMA_SMALL_VECTOR +/* +This is a vector (a variable-sized array), optimized for the case when the array is small. + +It contains some number of elements in-place, which allows it to avoid heap allocation +when the actual number of elements is below that threshold. This allows normal "small" +cases to be fast without losing generality for large inputs. +*/ +template +class VmaSmallVector +{ +public: + typedef T value_type; + typedef T* iterator; + + VmaSmallVector(const AllocatorT& allocator); + VmaSmallVector(size_t count, const AllocatorT& allocator); + template + VmaSmallVector(const VmaSmallVector&) = delete; + template + VmaSmallVector& operator=(const VmaSmallVector&) = delete; + ~VmaSmallVector() = default; + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + + iterator begin() { return data(); } + iterator end() { return data() + m_Count; } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void resize(size_t newCount, bool freeMemory = false); + void clear(bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + +private: + size_t m_Count; + T m_StaticArray[N]; // Used when m_Size <= N + VmaVector m_DynamicArray; // Used when m_Size > N +}; + +#ifndef _VMA_SMALL_VECTOR_FUNCTIONS +template +VmaSmallVector::VmaSmallVector(const AllocatorT& allocator) + : m_Count(0), + m_DynamicArray(allocator) {} + +template +VmaSmallVector::VmaSmallVector(size_t count, const AllocatorT& allocator) + : m_Count(count), + m_DynamicArray(count > N ? count : 0, allocator) {} + +template +void VmaSmallVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; +} + +template +void VmaSmallVector::resize(size_t newCount, bool freeMemory) +{ + if (newCount > N && m_Count > N) + { + // Any direction, staying in m_DynamicArray + m_DynamicArray.resize(newCount); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else if (newCount > N && m_Count <= N) + { + // Growing, moving from m_StaticArray to m_DynamicArray + m_DynamicArray.resize(newCount); + if (m_Count > 0) + { + memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); + } + } + else if (newCount <= N && m_Count > N) + { + // Shrinking, moving from m_DynamicArray to m_StaticArray + if (newCount > 0) + { + memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); + } + m_DynamicArray.resize(0); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else + { + // Any direction, staying in m_StaticArray - nothing to do here + } + m_Count = newCount; +} + +template +void VmaSmallVector::clear(bool freeMemory) +{ + m_DynamicArray.clear(); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + m_Count = 0; +} + +template +void VmaSmallVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + T* const dataPtr = data(); + if (index < oldCount) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. + memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); + } + dataPtr[index] = src; +} + +template +void VmaSmallVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. + T* const dataPtr = data(); + memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_SMALL_VECTOR_FUNCTIONS +#endif // _VMA_SMALL_VECTOR + +#ifndef _VMA_POOL_ALLOCATOR +/* +Allocator for objects of type T using a list of arrays (pools) to speed up +allocation. Number of elements that can be allocated is not bounded because +allocator can create multiple blocks. +*/ +template +class VmaPoolAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) +public: + VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); + ~VmaPoolAllocator(); + template T* Alloc(Types&&... args); + void Free(T* ptr); + +private: + union Item + { + uint32_t NextFreeIndex; + alignas(T) char Value[sizeof(T)]; + }; + struct ItemBlock + { + Item* pItems; + uint32_t Capacity; + uint32_t FirstFreeIndex; + }; + + const VkAllocationCallbacks* m_pAllocationCallbacks; + const uint32_t m_FirstBlockCapacity; + VmaVector> m_ItemBlocks; + + ItemBlock& CreateNewBlock(); +}; + +#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS +template +VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_FirstBlockCapacity(firstBlockCapacity), + m_ItemBlocks(VmaStlAllocator(pAllocationCallbacks)) +{ + VMA_ASSERT(m_FirstBlockCapacity > 1); +} + +template +VmaPoolAllocator::~VmaPoolAllocator() +{ + for (size_t i = m_ItemBlocks.size(); i--;) + vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); + m_ItemBlocks.clear(); +} + +template +template T* VmaPoolAllocator::Alloc(Types&&... args) +{ + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + // This block has some free items: Use first one. + if (block.FirstFreeIndex != UINT32_MAX) + { + Item* const pItem = &block.pItems[block.FirstFreeIndex]; + block.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; + } + } + + // No block has free item: Create new one and use it. + ItemBlock& newBlock = CreateNewBlock(); + Item* const pItem = &newBlock.pItems[0]; + newBlock.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result) T(std::forward(args)...); // Explicit constructor call. + return result; +} + +template +void VmaPoolAllocator::Free(T* ptr) +{ + // Search all memory blocks to find ptr. + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + + // Casting to union. + Item* pItemPtr; + memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); + + // Check if pItemPtr is in address range of this block. + if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) + { + ptr->~T(); // Explicit destructor call. + const uint32_t index = static_cast(pItemPtr - block.pItems); + pItemPtr->NextFreeIndex = block.FirstFreeIndex; + block.FirstFreeIndex = index; + return; + } + } + VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); +} + +template +typename VmaPoolAllocator::ItemBlock& VmaPoolAllocator::CreateNewBlock() +{ + const uint32_t newBlockCapacity = m_ItemBlocks.empty() ? + m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; + + const ItemBlock newBlock = + { + vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity), + newBlockCapacity, + 0 + }; + + m_ItemBlocks.push_back(newBlock); + + // Setup singly-linked list of all free items in this block. + for (uint32_t i = 0; i < newBlockCapacity - 1; ++i) + newBlock.pItems[i].NextFreeIndex = i + 1; + newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; + return m_ItemBlocks.back(); +} +#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS +#endif // _VMA_POOL_ALLOCATOR + +#ifndef _VMA_RAW_LIST +template +struct VmaListItem +{ + VmaListItem* pPrev; + VmaListItem* pNext; + T Value; +}; + +// Doubly linked list. +template +class VmaRawList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) +public: + typedef VmaListItem ItemType; + + VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); + // Intentionally not calling Clear, because that would be unnecessary + // computations to return all items to m_ItemAllocator as free. + ~VmaRawList() = default; + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + + ItemType* Front() { return m_pFront; } + ItemType* Back() { return m_pBack; } + const ItemType* Front() const { return m_pFront; } + const ItemType* Back() const { return m_pBack; } + + ItemType* PushFront(); + ItemType* PushBack(); + ItemType* PushFront(const T& value); + ItemType* PushBack(const T& value); + void PopFront(); + void PopBack(); + + // Item can be null - it means PushBack. + ItemType* InsertBefore(ItemType* pItem); + // Item can be null - it means PushFront. + ItemType* InsertAfter(ItemType* pItem); + ItemType* InsertBefore(ItemType* pItem, const T& value); + ItemType* InsertAfter(ItemType* pItem, const T& value); + + void Clear(); + void Remove(ItemType* pItem); + +private: + const VkAllocationCallbacks* const m_pAllocationCallbacks; + VmaPoolAllocator m_ItemAllocator; + ItemType* m_pFront; + ItemType* m_pBack; + size_t m_Count; +}; + +#ifndef _VMA_RAW_LIST_FUNCTIONS +template +VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_ItemAllocator(pAllocationCallbacks, 128), + m_pFront(VMA_NULL), + m_pBack(VMA_NULL), + m_Count(0) {} + +template +VmaListItem* VmaRawList::PushFront() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pPrev = VMA_NULL; + if (IsEmpty()) + { + pNewItem->pNext = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pNext = m_pFront; + m_pFront->pPrev = pNewItem; + m_pFront = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pNext = VMA_NULL; + if(IsEmpty()) + { + pNewItem->pPrev = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pPrev = m_pBack; + m_pBack->pNext = pNewItem; + m_pBack = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushFront(const T& value) +{ + ItemType* const pNewItem = PushFront(); + pNewItem->Value = value; + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack(const T& value) +{ + ItemType* const pNewItem = PushBack(); + pNewItem->Value = value; + return pNewItem; +} + +template +void VmaRawList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pFrontItem = m_pFront; + ItemType* const pNextItem = pFrontItem->pNext; + if (pNextItem != VMA_NULL) + { + pNextItem->pPrev = VMA_NULL; + } + m_pFront = pNextItem; + m_ItemAllocator.Free(pFrontItem); + --m_Count; +} + +template +void VmaRawList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pBackItem = m_pBack; + ItemType* const pPrevItem = pBackItem->pPrev; + if(pPrevItem != VMA_NULL) + { + pPrevItem->pNext = VMA_NULL; + } + m_pBack = pPrevItem; + m_ItemAllocator.Free(pBackItem); + --m_Count; +} + +template +void VmaRawList::Clear() +{ + if (IsEmpty() == false) + { + ItemType* pItem = m_pBack; + while (pItem != VMA_NULL) + { + ItemType* const pPrevItem = pItem->pPrev; + m_ItemAllocator.Free(pItem); + pItem = pPrevItem; + } + m_pFront = VMA_NULL; + m_pBack = VMA_NULL; + m_Count = 0; + } +} + +template +void VmaRawList::Remove(ItemType* pItem) +{ + VMA_HEAVY_ASSERT(pItem != VMA_NULL); + VMA_HEAVY_ASSERT(m_Count > 0); + + if(pItem->pPrev != VMA_NULL) + { + pItem->pPrev->pNext = pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = pItem->pNext; + } + + if(pItem->pNext != VMA_NULL) + { + pItem->pNext->pPrev = pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = pItem->pPrev; + } + + m_ItemAllocator.Free(pItem); + --m_Count; +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const prevItem = pItem->pPrev; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pPrev = prevItem; + newItem->pNext = pItem; + pItem->pPrev = newItem; + if(prevItem != VMA_NULL) + { + prevItem->pNext = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = newItem; + } + ++m_Count; + return newItem; + } + else + return PushBack(); +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const nextItem = pItem->pNext; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pNext = nextItem; + newItem->pPrev = pItem; + pItem->pNext = newItem; + if(nextItem != VMA_NULL) + { + nextItem->pPrev = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = newItem; + } + ++m_Count; + return newItem; + } + else + return PushFront(); +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertBefore(pItem); + newItem->Value = value; + return newItem; +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertAfter(pItem); + newItem->Value = value; + return newItem; +} +#endif // _VMA_RAW_LIST_FUNCTIONS +#endif // _VMA_RAW_LIST + +#ifndef _VMA_LIST +template +class VmaList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaList) +public: + class reverse_iterator; + class const_iterator; + class const_reverse_iterator; + + class iterator + { + friend class const_iterator; + friend class VmaList; + public: + iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + iterator operator++(int) { iterator result = *this; ++*this; return result; } + iterator operator--(int) { iterator result = *this; --*this; return result; } + + iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class reverse_iterator + { + friend class const_reverse_iterator; + friend class VmaList; + public: + reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; } + reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; } + + reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + reverse_iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + reverse_iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_iterator + { + friend class VmaList; + public: + const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; } + const_iterator operator--(int) { const_iterator result = *this; --* this; return result; } + + const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + const_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_reverse_iterator + { + friend class VmaList; + public: + const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; } + const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; } + + const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + const_reverse_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_reverse_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + + VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {} + + bool empty() const { return m_RawList.IsEmpty(); } + size_t size() const { return m_RawList.GetCount(); } + + iterator begin() { return iterator(&m_RawList, m_RawList.Front()); } + iterator end() { return iterator(&m_RawList, VMA_NULL); } + + const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } + const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } + + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } + reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + + void push_back(const T& value) { m_RawList.PushBack(value); } + iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } + + void clear() { m_RawList.Clear(); } + void erase(iterator it) { m_RawList.Remove(it.m_pItem); } + +private: + VmaRawList m_RawList; +}; + +#ifndef _VMA_LIST_FUNCTIONS +template +typename VmaList::iterator& VmaList::iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::reverse_iterator& VmaList::reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename VmaList::const_iterator& VmaList::const_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::const_reverse_iterator& VmaList::const_reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} +#endif // _VMA_LIST_FUNCTIONS +#endif // _VMA_LIST + +#ifndef _VMA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ + typedef MyItem ItemType; + static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } + static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } + static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } + static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template +class VmaIntrusiveLinkedList +{ +public: + typedef typename ItemTypeTraits::ItemType ItemType; + static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } + static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + + // Movable, not copyable. + VmaIntrusiveLinkedList() = default; + VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src); + VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete; + VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); + VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; + ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + ItemType* Front() { return m_Front; } + ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } + const ItemType* Back() const { return m_Back; } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); + + // MyItem can be null - it means PushBack. + void InsertBefore(ItemType* existingItem, ItemType* newItem); + // MyItem can be null - it means PushFront. + void InsertAfter(ItemType* existingItem, ItemType* newItem); + void Remove(ItemType* item); + void RemoveAll(); + +private: + ItemType* m_Front = VMA_NULL; + ItemType* m_Back = VMA_NULL; + size_t m_Count = 0; +}; + +#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +VmaIntrusiveLinkedList::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; +} + +template +VmaIntrusiveLinkedList& VmaIntrusiveLinkedList::operator=(VmaIntrusiveLinkedList&& src) +{ + if (&src != this) + { + VMA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; + } + return *this; +} + +template +void VmaIntrusiveLinkedList::PushBack(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; + } +} + +template +void VmaIntrusiveLinkedList::PushFront(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = VMA_NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = VMA_NULL; + ItemTypeTraits::AccessNext(backItem) = VMA_NULL; + return backItem; +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL; + ItemTypeTraits::AccessNext(frontItem) = VMA_NULL; + return frontItem; +} + +template +void VmaIntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} + +template +void VmaIntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} + +template +void VmaIntrusiveLinkedList::Remove(ItemType* item) +{ + VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != VMA_NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + VMA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + + if (ItemTypeTraits::GetNext(item) != VMA_NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + VMA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + --m_Count; +} + +template +void VmaIntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + item = prevItem; + } + m_Front = VMA_NULL; + m_Back = VMA_NULL; + m_Count = 0; + } +} +#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _VMA_INTRUSIVE_LINKED_LIST + +#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED +class VmaStringBuilder +{ +public: + VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator(allocationCallbacks)) {} + ~VmaStringBuilder() = default; + + size_t GetLength() const { return m_Data.size(); } + const char* GetData() const { return m_Data.data(); } + void AddNewLine() { Add('\n'); } + void Add(char ch) { m_Data.push_back(ch); } + + void Add(const char* pStr); + void AddNumber(uint32_t num); + void AddNumber(uint64_t num); + void AddPointer(const void* ptr); + +private: + VmaVector> m_Data; +}; + +#ifndef _VMA_STRING_BUILDER_FUNCTIONS +void VmaStringBuilder::Add(const char* pStr) +{ + const size_t strLen = strlen(pStr); + if (strLen > 0) + { + const size_t oldCount = m_Data.size(); + m_Data.resize(oldCount + strLen); + memcpy(m_Data.data() + oldCount, pStr, strLen); + } +} + +void VmaStringBuilder::AddNumber(uint32_t num) +{ + char buf[11]; + buf[10] = '\0'; + char* p = &buf[10]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddNumber(uint64_t num) +{ + char buf[21]; + buf[20] = '\0'; + char* p = &buf[20]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddPointer(const void* ptr) +{ + char buf[21]; + VmaPtrToStr(buf, sizeof(buf), ptr); + Add(buf); +} +#endif //_VMA_STRING_BUILDER_FUNCTIONS +#endif // _VMA_STRING_BUILDER + +#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED +/* +Allows to conveniently build a correct JSON document to be written to the +VmaStringBuilder passed to the constructor. +*/ +class VmaJsonWriter +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) +public: + // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. + VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); + ~VmaJsonWriter(); + + // Begins object by writing "{". + // Inside an object, you must call pairs of WriteString and a value, e.g.: + // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); + // Will write: { "A": 1, "B": 2 } + void BeginObject(bool singleLine = false); + // Ends object by writing "}". + void EndObject(); + + // Begins array by writing "[". + // Inside an array, you can write a sequence of any values. + void BeginArray(bool singleLine = false); + // Ends array by writing "[". + void EndArray(); + + // Writes a string value inside "". + // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. + void WriteString(const char* pStr); + + // Begins writing a string value. + // Call BeginString, ContinueString, ContinueString, ..., EndString instead of + // WriteString to conveniently build the string content incrementally, made of + // parts including numbers. + void BeginString(const char* pStr = VMA_NULL); + // Posts next part of an open string. + void ContinueString(const char* pStr); + // Posts next part of an open string. The number is converted to decimal characters. + void ContinueString(uint32_t n); + void ContinueString(uint64_t n); + // Posts next part of an open string. Pointer value is converted to characters + // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 + void ContinueString_Pointer(const void* ptr); + // Ends writing a string value by writing '"'. + void EndString(const char* pStr = VMA_NULL); + + // Writes a number value. + void WriteNumber(uint32_t n); + void WriteNumber(uint64_t n); + // Writes a boolean value - false or true. + void WriteBool(bool b); + // Writes a null value. + void WriteNull(); + +private: + enum COLLECTION_TYPE + { + COLLECTION_TYPE_OBJECT, + COLLECTION_TYPE_ARRAY, + }; + struct StackItem + { + COLLECTION_TYPE type; + uint32_t valueCount; + bool singleLineMode; + }; + + static const char* const INDENT; + + VmaStringBuilder& m_SB; + VmaVector< StackItem, VmaStlAllocator > m_Stack; + bool m_InsideString; + + void BeginValue(bool isString); + void WriteIndent(bool oneLess = false); +}; +const char* const VmaJsonWriter::INDENT = " "; + +#ifndef _VMA_JSON_WRITER_FUNCTIONS +VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) + : m_SB(sb), + m_Stack(VmaStlAllocator(pAllocationCallbacks)), + m_InsideString(false) {} + +VmaJsonWriter::~VmaJsonWriter() +{ + VMA_ASSERT(!m_InsideString); + VMA_ASSERT(m_Stack.empty()); +} + +void VmaJsonWriter::BeginObject(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('{'); + + StackItem item; + item.type = COLLECTION_TYPE_OBJECT; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndObject() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add('}'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); + m_Stack.pop_back(); +} + +void VmaJsonWriter::BeginArray(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('['); + + StackItem item; + item.type = COLLECTION_TYPE_ARRAY; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndArray() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add(']'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); + m_Stack.pop_back(); +} + +void VmaJsonWriter::WriteString(const char* pStr) +{ + BeginString(pStr); + EndString(); +} + +void VmaJsonWriter::BeginString(const char* pStr) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(true); + m_SB.Add('"'); + m_InsideString = true; + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } +} + +void VmaJsonWriter::ContinueString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + + const size_t strLen = strlen(pStr); + for (size_t i = 0; i < strLen; ++i) + { + char ch = pStr[i]; + if (ch == '\\') + { + m_SB.Add("\\\\"); + } + else if (ch == '"') + { + m_SB.Add("\\\""); + } + else if ((uint8_t)ch >= 32) + { + m_SB.Add(ch); + } + else switch (ch) + { + case '\b': + m_SB.Add("\\b"); + break; + case '\f': + m_SB.Add("\\f"); + break; + case '\n': + m_SB.Add("\\n"); + break; + case '\r': + m_SB.Add("\\r"); + break; + case '\t': + m_SB.Add("\\t"); + break; + default: + VMA_ASSERT(0 && "Character not currently supported."); + } + } +} + +void VmaJsonWriter::ContinueString(uint32_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString(uint64_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString_Pointer(const void* ptr) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddPointer(ptr); +} + +void VmaJsonWriter::EndString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } + m_SB.Add('"'); + m_InsideString = false; +} + +void VmaJsonWriter::WriteNumber(uint32_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteNumber(uint64_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteBool(bool b) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add(b ? "true" : "false"); +} + +void VmaJsonWriter::WriteNull() +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add("null"); +} + +void VmaJsonWriter::BeginValue(bool isString) +{ + if (!m_Stack.empty()) + { + StackItem& currItem = m_Stack.back(); + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 == 0) + { + VMA_ASSERT(isString); + } + + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 != 0) + { + m_SB.Add(": "); + } + else if (currItem.valueCount > 0) + { + m_SB.Add(", "); + WriteIndent(); + } + else + { + WriteIndent(); + } + ++currItem.valueCount; + } +} + +void VmaJsonWriter::WriteIndent(bool oneLess) +{ + if (!m_Stack.empty() && !m_Stack.back().singleLineMode) + { + m_SB.AddNewLine(); + + size_t count = m_Stack.size(); + if (count > 0 && oneLess) + { + --count; + } + for (size_t i = 0; i < count; ++i) + { + m_SB.Add(INDENT); + } + } +} +#endif // _VMA_JSON_WRITER_FUNCTIONS + +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) +{ + json.BeginObject(); + + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); + json.WriteNumber(stat.unusedRangeCount); + + if (stat.statistics.allocationCount > 1) + { + json.WriteString("AllocationSizeMin"); + json.WriteNumber(stat.allocationSizeMin); + json.WriteString("AllocationSizeMax"); + json.WriteNumber(stat.allocationSizeMax); + } + if (stat.unusedRangeCount > 1) + { + json.WriteString("UnusedRangeSizeMin"); + json.WriteNumber(stat.unusedRangeSizeMin); + json.WriteString("UnusedRangeSizeMax"); + json.WriteNumber(stat.unusedRangeSizeMax); + } + json.EndObject(); +} +#endif // _VMA_JSON_WRITER + +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + { + ++m_MinorCounter; + } + else if(m_MajorCounter > 0) + { + --m_MajorCounter; + --m_MinorCounter; + } + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + +#if VMA_EXTERNAL_MEMORY_WIN32 +class VmaWin32Handle +{ +public: + VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { } + explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { } + ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } } + VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle) + +public: + // Strengthened + VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept + { + *pHandle = VMA_NULL; + // Try to get handle first. + if (m_hHandle != VMA_NULL) + { + *pHandle = Duplicate(hTargetProcess); + return VK_SUCCESS; + } + + VkResult res = VK_SUCCESS; + // If failed, try to create it. + { + VmaMutexLockWrite lock(m_Mutex, useMutex); + if (m_hHandle == VMA_NULL) + { + res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle); + } + } + + *pHandle = Duplicate(hTargetProcess); + return res; + } + + operator bool() const noexcept { return m_hHandle != VMA_NULL; } +private: + // Not atomic + static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept + { + VkResult res = VK_ERROR_FEATURE_NOT_PRESENT; + if (pvkGetMemoryWin32HandleKHR != VMA_NULL) + { + VkMemoryGetWin32HandleInfoKHR handleInfo{ }; + handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handleInfo.memory = memory; + handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; + res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle); + } + return res; + } + HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept + { + if (!m_hHandle) + return m_hHandle; + + HANDLE hCurrentProcess = ::GetCurrentProcess(); + HANDLE hDupHandle = VMA_NULL; + if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + VMA_ASSERT(0 && "Failed to duplicate handle."); + } + return hDupHandle; + } +private: + HANDLE m_hHandle; + VMA_RW_MUTEX m_Mutex; // Protects access m_Handle +}; +#else +class VmaWin32Handle +{ + // ABI compatibility + void* placeholder = VMA_NULL; + VMA_RW_MUTEX placeholder2; +}; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + + +#ifndef _VMA_DEVICE_MEMORY_BLOCK +/* +Represents a single block of device memory (`VkDeviceMemory`) with all the +data about its regions (aka suballocations, #VmaAllocation), assigned and free. + +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. +*/ +class VmaDeviceMemoryBlock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) +public: + VmaBlockMetadata* m_pMetadata; + + VmaDeviceMemoryBlock(VmaAllocator hAllocator); + ~VmaDeviceMemoryBlock(); + + // Always call after construction. + void Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity); + // Always call before destruction. + void Destroy(VmaAllocator allocator); + + VmaPool GetParentPool() const { return m_hParentPool; } + VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + uint32_t GetId() const { return m_Id; } + void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc(VmaAllocator hAllocator); + void PostFree(VmaAllocator hAllocator); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + VkResult CheckCorruption(VmaAllocator hAllocator); + + // ppData can be null. + VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); + void Unmap(VmaAllocator hAllocator, uint32_t count); + + VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + + VkResult BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult CreateWin32Handle( + const VmaAllocator hAllocator, + PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, + HANDLE hTargetProcess, + HANDLE* pHandle)noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 +private: + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + uint32_t m_MemoryTypeIndex; + uint32_t m_Id; + VkDeviceMemory m_hMemory; + + /* + Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. + Also protects m_MapCount, m_pMappedData. + Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. + */ + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; + uint32_t m_MapCount; + void* m_pMappedData; + + VmaWin32Handle m_Handle; +}; +#endif // _VMA_DEVICE_MEMORY_BLOCK + +#ifndef _VMA_ALLOCATION_T +struct VmaAllocationExtraData +{ + void* m_pMappedData = VMA_NULL; // Not null means memory is mapped. + VmaWin32Handle m_Handle; +}; + +struct VmaAllocation_T +{ + friend struct VmaDedicatedAllocationListItemTraits; + + enum FLAGS + { + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, + }; + +public: + enum ALLOCATION_TYPE + { + ALLOCATION_TYPE_NONE, + ALLOCATION_TYPE_BLOCK, + ALLOCATION_TYPE_DEDICATED, + }; + + // This struct is allocated using VmaPoolAllocator. + VmaAllocation_T(bool mappingAllowed); + ~VmaAllocation_T(); + + void InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped); + // pMappedData not null means allocation is created with MAPPED flag. + void InitDedicatedAllocation( + VmaAllocator allocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size); + void Destroy(VmaAllocator allocator); + + ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } + VkDeviceSize GetAlignment() const { return m_Alignment; } + VkDeviceSize GetSize() const { return m_Size; } + void* GetUserData() const { return m_pUserData; } + const char* GetName() const { return m_pName; } + VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } + + VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } + + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); + VmaAllocHandle GetAllocHandle() const; + VkDeviceSize GetOffset() const; + VmaPool GetParentPool() const; + VkDeviceMemory GetMemory() const; + void* GetMappedData() const; + + void BlockAllocMap(); + void BlockAllocUnmap(); + VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); + void DedicatedAllocUnmap(VmaAllocator hAllocator); + +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } + void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); + } + void InitImageUsage(const VkImageCreateInfo &createInfo) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo); + } + void PrintParameters(class VmaJsonWriter& json) const; +#endif + +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +private: + // Allocation out of VmaDeviceMemoryBlock. + struct BlockAllocation + { + VmaDeviceMemoryBlock* m_Block; + VmaAllocHandle m_AllocHandle; + }; + // Allocation for an object that has its own private VkDeviceMemory. + struct DedicatedAllocation + { + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + VkDeviceMemory m_hMemory; + VmaAllocationExtraData* m_ExtraData; + VmaAllocation_T* m_Prev; + VmaAllocation_T* m_Next; + }; + union + { + // Allocation out of VmaDeviceMemoryBlock. + BlockAllocation m_BlockAllocation; + // Allocation for an object that has its own private VkDeviceMemory. + DedicatedAllocation m_DedicatedAllocation; + }; + + VkDeviceSize m_Alignment; + VkDeviceSize m_Size; + void* m_pUserData; + char* m_pName; + uint32_t m_MemoryTypeIndex; + uint8_t m_Type; // ALLOCATION_TYPE + uint8_t m_SuballocationType; // VmaSuballocationType + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). + uint8_t m_MapCount; + uint8_t m_Flags; // enum FLAGS +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. +#endif + + void EnsureExtraData(VmaAllocator hAllocator); +}; +#endif // _VMA_ALLOCATION_T + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS +struct VmaDedicatedAllocationListItemTraits +{ + typedef VmaAllocation_T ItemType; + + static ItemType* GetPrev(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType* GetNext(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } + static ItemType*& AccessPrev(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType*& AccessNext(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } +}; +#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST +/* +Stores linked list of VmaAllocation_T objects. +Thread-safe, synchronized internally. +*/ +class VmaDedicatedAllocationList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) +public: + VmaDedicatedAllocationList() {} + ~VmaDedicatedAllocationList(); + + void Init(bool useMutex) { m_UseMutex = useMutex; } + bool Validate(); + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); +#if VMA_STATS_STRING_ENABLED + // Writes JSON array with the list of allocations. + void BuildStatsString(VmaJsonWriter& json); +#endif + + bool IsEmpty(); + void Register(VmaAllocation alloc); + void Unregister(VmaAllocation alloc); + +private: + typedef VmaIntrusiveLinkedList DedicatedAllocationLinkedList; + + bool m_UseMutex = true; + VMA_RW_MUTEX m_Mutex; + DedicatedAllocationLinkedList m_AllocationList; +}; + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS + +VmaDedicatedAllocationList::~VmaDedicatedAllocationList() +{ + VMA_HEAVY_ASSERT(Validate()); + + if (!m_AllocationList.IsEmpty()) + { + VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); + } +} + +bool VmaDedicatedAllocationList::Validate() +{ + const size_t declaredCount = m_AllocationList.GetCount(); + size_t actualCount = 0; + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + ++actualCount; + } + VMA_VALIDATE(actualCount == declaredCount); + + return true; +} + +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); + } +} + +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; + + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + json.BeginArray(); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + alloc->PrintParameters(json); + json.EndObject(); + } + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaDedicatedAllocationList::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + return m_AllocationList.IsEmpty(); +} + +void VmaDedicatedAllocationList::Register(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS +#endif // _VMA_DEDICATED_ALLOCATION_LIST + +#ifndef _VMA_SUBALLOCATION +/* +Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as +allocated memory block or free. +*/ +struct VmaSuballocation +{ + VkDeviceSize offset; + VkDeviceSize size; + void* userData; + VmaSuballocationType type; +}; + +// Comparator for offsets. +struct VmaSuballocationOffsetLess +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset < rhs.offset; + } +}; + +struct VmaSuballocationOffsetGreater +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset > rhs.offset; + } +}; + +struct VmaSuballocationItemSizeLess +{ + bool operator()(const VmaSuballocationList::iterator lhs, + const VmaSuballocationList::iterator rhs) const + { + return lhs->size < rhs->size; + } + + bool operator()(const VmaSuballocationList::iterator lhs, + VkDeviceSize rhsSize) const + { + return lhs->size < rhsSize; + } +}; +#endif // _VMA_SUBALLOCATION + +#ifndef _VMA_ALLOCATION_REQUEST +/* +Parameters of planned allocation inside a VmaDeviceMemoryBlock. +item points to a FREE suballocation. +*/ +struct VmaAllocationRequest +{ + VmaAllocHandle allocHandle; + VkDeviceSize size; + VmaSuballocationList::iterator item; + void* customData; + uint64_t algorithmData; + VmaAllocationRequestType type; +}; +#endif // _VMA_ALLOCATION_REQUEST + +#ifndef _VMA_BLOCK_METADATA +/* +Data structure used for bookkeeping of allocations and unused ranges of memory +in a single VkDeviceMemory block. +*/ +class VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) +public: + // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. + VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata() = default; + + virtual void Init(VkDeviceSize size) { m_Size = size; } + bool IsVirtual() const { return m_IsVirtual; } + VkDeviceSize GetSize() const { return m_Size; } + + // Validates all data structures inside this object. If not valid, returns false. + virtual bool Validate() const = 0; + virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; + virtual VkDeviceSize GetSumFreeSize() const = 0; + // Returns true if this block is empty - contains only single free suballocation. + virtual bool IsEmpty() const = 0; + virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; + virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; + + // Shouldn't modify blockCount. + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; + +#if VMA_STATS_STRING_ENABLED + virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; +#endif + + // Tries to find a place for suballocation with given parameters inside this block. + // If succeeded, fills pAllocationRequest and returns true. + // If failed, returns false. + virtual bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags. + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) = 0; + + virtual VkResult CheckCorruption(const void* pBlockData) = 0; + + // Makes actual allocation based on request. Request must already be checked and valid. + virtual void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) = 0; + + // Frees suballocation assigned to given memory region. + virtual void Free(VmaAllocHandle allocHandle) = 0; + + // Frees all allocations. + // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations! + virtual void Clear() = 0; + + virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0; + virtual void DebugLogAllAllocations() const = 0; + +protected: + const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } + + void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; +#if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. + void PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, + size_t allocationCount, + size_t unusedRangeCount) const; + void PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const; + void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, + VkDeviceSize size) const; + void PrintDetailedMap_End(class VmaJsonWriter& json) const; +#endif + +private: + VkDeviceSize m_Size; + const VkAllocationCallbacks* m_pAllocationCallbacks; + const VkDeviceSize m_BufferImageGranularity; + const bool m_IsVirtual; +}; + +#ifndef _VMA_BLOCK_METADATA_FUNCTIONS +VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : m_Size(0), + m_pAllocationCallbacks(pAllocationCallbacks), + m_BufferImageGranularity(bufferImageGranularity), + m_IsVirtual(isVirtual) {} + +void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + if (IsVirtual()) + { + VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); + } + else + { + VMA_ASSERT(userData != VMA_NULL); + VmaAllocation allocation = reinterpret_cast(userData); + + userData = allocation->GetUserData(); + const char* name = allocation->GetName(); + +#if VMA_STATS_STRING_ENABLED + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + (uint64_t)allocation->GetBufferImageUsage().Value); +#else + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (unsigned)allocation->GetSuballocationType()); +#endif // VMA_STATS_STRING_ENABLED + } + +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString("TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString("UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString("Allocations"); + json.WriteNumber((uint64_t)allocationCount); + + json.WriteString("UnusedRanges"); + json.WriteNumber((uint64_t)unusedRangeCount); + + json.WriteString("Suballocations"); + json.BeginArray(); +} + +void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString("Size"); + json.WriteNumber(size); + if (userData) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(userData); + json.EndString(); + } + } + else + { + ((VmaAllocation)userData)->PrintParameters(json); + } + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + + json.WriteString("Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const +{ + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_BLOCK_METADATA_FUNCTIONS +#endif // _VMA_BLOCK_METADATA + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +// Before deleting object of this class remember to call 'Destroy()' +class VmaBlockBufferImageGranularity final +{ +public: + struct ValidationContext + { + const VkAllocationCallbacks* allocCallbacks; + uint16_t* pageAllocs; + }; + + VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity); + ~VmaBlockBufferImageGranularity(); + + bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; } + + void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size); + // Before destroying object you must call free it's memory + void Destroy(const VkAllocationCallbacks* pAllocationCallbacks); + + void RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const; + + bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const; + + void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size); + void FreePages(VkDeviceSize offset, VkDeviceSize size); + void Clear(); + + ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks, + bool isVirutal) const; + bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const; + bool FinishValidation(ValidationContext& ctx) const; + +private: + static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256; + + struct RegionInfo + { + uint8_t allocType; + uint16_t allocCount; + }; + + VkDeviceSize m_BufferImageGranularity; + uint32_t m_RegionCount; + RegionInfo* m_RegionInfo; + + uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); } + uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); } + + uint32_t OffsetToPageIndex(VkDeviceSize offset) const; + void AllocPage(RegionInfo& page, uint8_t allocType); +}; + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity) + : m_BufferImageGranularity(bufferImageGranularity), + m_RegionCount(0), + m_RegionInfo(VMA_NULL) {} + +VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity() +{ + VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!"); +} + +void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size) +{ + if (IsEnabled()) + { + m_RegionCount = static_cast(VmaDivideRoundingUp(size, m_BufferImageGranularity)); + m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount); + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); + } +} + +void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks) +{ + if (m_RegionInfo) + { + vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount); + m_RegionInfo = VMA_NULL; + } +} + +void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const +{ + if (m_BufferImageGranularity > 1 && + m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY) + { + if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) + { + inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity); + inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity); + } + } +} + +bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(inOutAllocOffset); + if (m_RegionInfo[startPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[startPage].allocType), allocType)) + { + inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity); + if (blockSize < allocSize + inOutAllocOffset - blockOffset) + return true; + ++startPage; + } + uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize); + if (endPage != startPage && + m_RegionInfo[endPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[endPage].allocType), allocType)) + { + return true; + } + } + return false; +} + +void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + AllocPage(m_RegionInfo[startPage], allocType); + + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + AllocPage(m_RegionInfo[endPage], allocType); + } +} + +void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + --m_RegionInfo[startPage].allocCount; + if (m_RegionInfo[startPage].allocCount == 0) + m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + { + --m_RegionInfo[endPage].allocCount; + if (m_RegionInfo[endPage].allocCount == 0) + m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + } + } +} + +void VmaBlockBufferImageGranularity::Clear() +{ + if (m_RegionInfo) + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +} + +VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation( + const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const +{ + ValidationContext ctx{ pAllocationCallbacks, VMA_NULL }; + if (!isVirutal && IsEnabled()) + { + ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount); + memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t)); + } + return ctx; +} + +bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx, + VkDeviceSize offset, VkDeviceSize size) const +{ + if (IsEnabled()) + { + uint32_t start = GetStartPage(offset); + ++ctx.pageAllocs[start]; + VMA_VALIDATE(m_RegionInfo[start].allocCount > 0); + + uint32_t end = GetEndPage(offset, size); + if (start != end) + { + ++ctx.pageAllocs[end]; + VMA_VALIDATE(m_RegionInfo[end].allocCount > 0); + } + } + return true; +} + +bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const +{ + // Check proper page structure + if (IsEnabled()) + { + VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!"); + + for (uint32_t page = 0; page < m_RegionCount; ++page) + { + VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount); + } + vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount); + ctx.pageAllocs = VMA_NULL; + } + return true; +} + +uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const +{ + return static_cast(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity)); +} + +void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) +{ + // When current alloc type is free then it can be overridden by new type + if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) + page.allocType = allocType; + + ++page.allocCount; +} +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY + +#ifndef _VMA_BLOCK_METADATA_LINEAR +/* +Allocations and their references in internal data structure look like this: + +if(m_2ndVectorMode == SECOND_VECTOR_EMPTY): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER): + + 0 +-------+ + | Alloc | 2nd[0] + +-------+ + | Alloc | 2nd[1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[1] + +-------+ + | Alloc | 2nd[0] +GetSize() +-------+ + +*/ +class VmaBlockMetadata_Linear : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) +public: + VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_Linear() = default; + + VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + /* + There are two suballocation vectors, used in ping-pong way. + The one with index m_1stVectorIndex is called 1st. + The one with index (m_1stVectorIndex ^ 1) is called 2nd. + 2nd can be non-empty only when 1st is not empty. + When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. + */ + typedef VmaVector> SuballocationVectorType; + + enum SECOND_VECTOR_MODE + { + SECOND_VECTOR_EMPTY, + /* + Suballocations in 2nd vector are created later than the ones in 1st, but they + all have smaller offset. + */ + SECOND_VECTOR_RING_BUFFER, + /* + Suballocations in 2nd vector are upper side of double stack. + They all have offsets higher than those in 1st vector. + Top of this stack means smaller offsets, but higher indices in this vector. + */ + SECOND_VECTOR_DOUBLE_STACK, + }; + + VkDeviceSize m_SumFreeSize; + SuballocationVectorType m_Suballocations0, m_Suballocations1; + uint32_t m_1stVectorIndex; + SECOND_VECTOR_MODE m_2ndVectorMode; + // Number of items in 1st vector with hAllocation = null at the beginning. + size_t m_1stNullItemsBeginCount; + // Number of other items in 1st vector with hAllocation = null somewhere in the middle. + size_t m_1stNullItemsMiddleCount; + // Number of items in 2nd vector with hAllocation = null. + size_t m_2ndNullItemsCount; + + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; + bool ShouldCompact1st() const; + void CleanupAfterFree(); + + bool CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); + bool CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(VmaStlAllocator(pAllocationCallbacks)), + m_Suballocations1(VmaStlAllocator(pAllocationCallbacks)), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) {} + +void VmaBlockMetadata_Linear::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + m_SumFreeSize = size; +} + +bool VmaBlockMetadata_Linear::Validate() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + VMA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + + VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + VkDeviceSize sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + const VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && + suballoc.userData == VMA_NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem1stCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + VMA_VALIDATE(offset <= GetSize()); + VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; +} + +size_t VmaBlockMetadata_Linear::GetAllocationCount() const +{ + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const VkDeviceSize unusedRangeSize = size - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } +} + +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const VkDeviceSize size = GetSize(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = size; + } + } + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + VkDeviceSize usedBytes = 0; + + VkDeviceSize lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const VkDeviceSize unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const VkDeviceSize unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaBlockMetadata_Linear::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0); + VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); + VMA_ASSERT(pAllocationRequest != VMA_NULL); + VMA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest); +} + +VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) +{ + VMA_ASSERT(!IsVirtual()); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_Linear::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; + const VmaSuballocation newSuballoc = { offset, request.size, userData, type }; + + switch (request.type) + { + case VmaAllocationRequestType::UpperAddress: + { + VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + } + break; + case VmaAllocationRequestType::EndOf1st: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + VMA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + VMA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + } + break; + case VmaAllocationRequestType::EndOf2nd: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + VMA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + VMA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + VMA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + VMA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + } + break; + default: + VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + + m_SumFreeSize -= newSuballoc.size; +} + +void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + VkDeviceSize offset = (VkDeviceSize)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; + firstSuballoc.userData = VMA_NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } + } + + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + VmaSuballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + VmaSuballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + outInfo.offset = (VkDeviceSize)allocHandle - 1; + VmaSuballocation& suballoc = FindSuballocation(outInfo.offset); + outInfo.size = suballoc.size; + outInfo.pUserData = suballoc.userData; +} + +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + +void VmaBlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1); + suballoc.userData = userData; +} + +void VmaBlockMetadata_Linear::DebugLogAllAllocations() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); + + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); +} + +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + return const_cast(*it); + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + return const_cast(*it); + } + } + + VMA_ASSERT(0 && "Allocation not found in linear allocator!"); + return const_cast(suballocations1st.back()); // Should never occur. +} + +bool VmaBlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void VmaBlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + VMA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + VmaVectorRemove(suballocations2nd, 0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + VMA_HEAVY_ASSERT(Validate()); +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize debugMargin = GetDebugMargin(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + debugMargin <= freeSpaceEnd) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on previous page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(!suballocations1st.empty()); + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + size_t index1st = m_1stNullItemsBeginCount; + + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset)) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) + { + for (size_t nextSuballocIndex = index1st; + nextSuballocIndex < suballocations1st.size(); + nextSuballocIndex++) + { + const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; + // pAllocationRequest->item, customData unused. + return true; + } + } + + return false; +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + VkDeviceSize resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + const VkDeviceSize debugMargin = GetDebugMargin(); + + // Apply debugMargin at the end. + if (debugMargin > 0) + { + if (resultOffset < debugMargin) + { + return false; + } + resultOffset -= debugMargin; + } + + // Apply alignment. + resultOffset = VmaAlignDown(resultOffset, allocAlignment); + + // Check next suballocations from 2nd for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); + } + } + + // There is enough free space. + const VkDeviceSize endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : + 0; + if (endOf1st + debugMargin <= resultOffset) + { + // Check previous suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (bufferImageGranularity > 1) + { + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; + return true; + } + + return false; +} +#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_LINEAR + +#ifndef _VMA_BLOCK_METADATA_TLSF +// To not search current larger region if first allocation won't succeed and skip to smaller range +// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). +// When fragmentation and reusal of previous blocks doesn't matter then use with +// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. +class VmaBlockMetadata_TLSF : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) +public: + VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const uint8_t SECOND_LEVEL_INDEX = 5; + static const uint16_t SMALL_BUFFER_SIZE = 256; + static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16; + static const uint8_t MEMORY_CLASS_SHIFT = 7; + static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + + class Block + { + public: + VkDeviceSize offset; + VkDeviceSize size; + Block* prevPhysical; + Block* nextPhysical; + + void MarkFree() { prevFree = VMA_NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken + union + { + Block* nextFree; + void* userData; + }; + }; + + size_t m_AllocCount; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount; + // Total size of free blocks excluding null block + VkDeviceSize m_BlocksFreeSize; + uint32_t m_IsFreeBitmap; + uint8_t m_MemoryClasses; + uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + uint32_t m_ListsCount; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList; + VmaPoolAllocator m_BlockAllocator; + Block* m_NullBlock; + VmaBlockBufferImageGranularity m_GranularityHandler; + + uint8_t SizeToMemoryClass(VkDeviceSize size) const; + uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const; + uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const; + uint32_t GetListIndex(VkDeviceSize size) const; + + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const; + bool CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_AllocCount(0), + m_BlocksFreeCount(0), + m_BlocksFreeSize(0), + m_IsFreeBitmap(0), + m_MemoryClasses(0), + m_ListsCount(0), + m_FreeList(VMA_NULL), + m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT), + m_NullBlock(VMA_NULL), + m_GranularityHandler(bufferImageGranularity) {} + +VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF() +{ + if (m_FreeList) + vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount); + m_GranularityHandler.Destroy(GetAllocationCallbacks()); +} + +void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + + if (!IsVirtual()) + m_GranularityHandler.Init(GetAllocationCallbacks(), size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = VMA_NULL; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + uint8_t memoryClass = SizeToMemoryClass(size); + uint16_t sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + uint8_t(2); + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); + + m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool VmaBlockMetadata_TLSF::Validate() const +{ + VMA_VALIDATE(GetSumFreeSize() <= GetSize()); + + VkDeviceSize calculatedSize = m_NullBlock->size; + VkDeviceSize calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (uint32_t list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != VMA_NULL) + { + VMA_VALIDATE(block->IsFree()); + VMA_VALIDATE(block->PrevFree() == VMA_NULL); + while (block->NextFree()) + { + VMA_VALIDATE(block->NextFree()->IsFree()); + VMA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + VkDeviceSize nextOffset = m_NullBlock->offset; + auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual()); + + VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL); + if (m_NullBlock->prevPhysical) + { + VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + // Check all blocks + for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical) + { + VMA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + uint32_t listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + VMA_VALIDATE(freeBlock != VMA_NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != VMA_NULL); + + VMA_VALIDATE(found); + calculatedFreeSize += prev->size; + } + else + { + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + VMA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size)); + } + } + + if (prev->prevPhysical) + { + VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev); + } + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx)); + } + + VMA_VALIDATE(nextOffset == 0); + VMA_VALIDATE(calculatedSize == GetSize()); + VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + VMA_VALIDATE(allocCount == m_AllocCount); + VMA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; +} + +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); + if (m_NullBlock->size > 0) + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); + + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree()) + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); + } +} + +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const +{ + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(blockCount, allocator); + + size_t i = blockCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + VMA_ASSERT(i == 0); + + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); + + PrintDetailedMap_Begin(json, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount); + + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData()); + } + if (m_NullBlock->size > 0) + PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size); + + PrintDetailedMap_End(json); +} +#endif + +bool VmaBlockMetadata_TLSF::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); + + // For small granularity round up + if (!IsVirtual()) + m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment); + + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest); + + // Round up to the next block + VkDeviceSize sizeForNextList = allocSize; + VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); + if (allocSize > SMALL_BUFFER_SIZE) + { + sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + uint32_t nextListIndex = m_ListsCount; + uint32_t prevListIndex = m_ListsCount; + Block* nextListBlock = VMA_NULL; + Block* prevListBlock = VMA_NULL; + + // Check blocks according to strategies + if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) + { + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + + // No more memory sadly + return false; +} + +VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (!block->IsFree()) + { + if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_TLSF::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF); + + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + VkDeviceSize offset = request.algorithmData; + VMA_ASSERT(currentBlock != VMA_NULL); + VMA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize missingAlignment = offset - currentBlock->offset; + + // Append missing alignment to prev block or create new one + if (missingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != debugMargin) + { + uint32_t oldList = GetListIndex(prevBlock->size); + prevBlock->size += missingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= missingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += missingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += missingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = missingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= missingAlignment; + currentBlock->offset += missingAlignment; + } + + VkDeviceSize size = request.size + debugMargin; + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = VMA_NULL; + m_NullBlock->NextFree() = VMA_NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->UserData() = userData; + + if (debugMargin > 0) + { + currentBlock->size -= debugMargin; + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = debugMargin; + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + + if (!IsVirtual()) + m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData, + currentBlock->offset, currentBlock->size); + ++m_AllocCount; +} + +void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + VMA_ASSERT(!block->IsFree() && "Block is already free!"); + + if (!IsVirtual()) + m_GranularityHandler.FreePages(block->offset, block->size); + --m_AllocCount; + + VkDeviceSize debugMargin = GetDebugMargin(); + if (debugMargin > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.offset = block->offset; + outInfo.size = block->size; + outInfo.pUserData = block->UserData(); +} + +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void VmaBlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = VMA_NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t)); + m_GranularityHandler.Clear(); +} + +void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->UserData() = userData; +} + +void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + if (!block->IsFree()) + DebugLogAllocation(block->offset, block->size, block->UserData()); +} + +uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const +{ + if (size > SMALL_BUFFER_SIZE) + return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); + return 0; +} + +uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const uint32_t index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(block->IsFree()); + + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != VMA_NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(m_FreeList[index] == block); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == VMA_NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->UserData() = VMA_NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(index < m_ListsCount); + block->PrevFree() = VMA_NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!"); + VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for available blocks + uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return VMA_NULL; // No more memory available + + // Find lowest free region + memoryClass = VMA_BITSCAN_LSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + VMA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap)); + VMA_ASSERT(m_FreeList[listIndex]); + return m_FreeList[listIndex]; +} + +bool VmaBlockMetadata_TLSF::CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(block.IsFree() && "Block is already taken!"); + + VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Check for granularity conflicts + if (!IsVirtual() && + m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType)) + return false; + + // Alloc successful + pAllocationRequest->type = VmaAllocationRequestType::TLSF; + pAllocationRequest->allocHandle = (VmaAllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->customData = (void*)allocType; + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = VMA_NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_TLSF + +#ifndef _VMA_BLOCK_VECTOR +/* +Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific +Vulkan memory type. + +Synchronized internally with a mutex. +*/ +class VmaBlockVector +{ + friend struct VmaDefragmentationContext_T; + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) +public: + VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext); + ~VmaBlockVector(); + + VmaAllocator GetAllocator() const { return m_hAllocator; } + VmaPool GetParentPool() const { return m_hParentPool; } + bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + uint32_t GetAlgorithm() const { return m_Algorithm; } + bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } + float GetPriority() const { return m_Priority; } + const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } + + VkResult CreateMinBlocks(); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + bool IsEmpty(); + bool IsCorruptionDetectionEnabled() const; + + VkResult Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + void Free(const VmaAllocation hAllocation); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + VkResult CheckCorruption(); + +private: + const VmaAllocator m_hAllocator; + const VmaPool m_hParentPool; + const uint32_t m_MemoryTypeIndex; + const VkDeviceSize m_PreferredBlockSize; + const size_t m_MinBlockCount; + const size_t m_MaxBlockCount; + const VkDeviceSize m_BufferImageGranularity; + const bool m_ExplicitBlockSize; + const uint32_t m_Algorithm; + const float m_Priority; + const VkDeviceSize m_MinAllocationAlignment; + + void* const m_pMemoryAllocateNext; + VMA_RW_MUTEX m_Mutex; + // Incrementally sorted by sumFreeSize, ascending. + VmaVector> m_Blocks; + uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + + VkDeviceSize CalcMaxBlockSize() const; + // Finds and removes given block from vector. + void Remove(VmaDeviceMemoryBlock* pBlock); + // Performs single step in sorting m_Blocks. They may not be fully sorted + // after this call. + void IncrementallySortBlocks(); + void SortByFreeSize(); + + VkResult AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation); + + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); + bool HasEmptyBlock(); +}; +#endif // _VMA_BLOCK_VECTOR + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) +public: + VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info); + ~VmaDefragmentationContext_T(); + + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } + + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock + { + uint32_t data; + VmaDeviceMemoryBlock* block; + }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; + struct StateExtensive + { + enum class Operation : uint8_t + { + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; + + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; + }; + struct MoveAllocationData + { + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; + }; + + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; + void* m_BreakCallbackUserData; + + VmaStlAllocator m_MoveAllocator; + VmaVector> m_Moves; + + uint8_t m_IgnoredAllocs = 0; + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); +}; +#endif // _VMA_DEFRAGMENTATION_CONTEXT + +#ifndef _VMA_POOL_T +struct VmaPool_T +{ + friend struct VmaPoolListItemTraits; + VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) +public: + VmaBlockVector m_BlockVector; + VmaDedicatedAllocationList m_DedicatedAllocations; + + VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize); + ~VmaPool_T(); + + uint32_t GetId() const { return m_Id; } + void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; } + + const char* GetName() const { return m_Name; } + void SetName(const char* pName); + +#if VMA_STATS_STRING_ENABLED + //void PrintDetailedMap(class VmaStringBuilder& sb); +#endif + +private: + uint32_t m_Id; + char* m_Name; + VmaPool_T* m_PrevPool = VMA_NULL; + VmaPool_T* m_NextPool = VMA_NULL; +}; + +struct VmaPoolListItemTraits +{ + typedef VmaPool_T ItemType; + + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _VMA_POOL_T + +#ifndef _VMA_CURRENT_BUDGET_DATA +struct VmaCurrentBudgetData +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; + +#if VMA_MEMORY_BUDGET + VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; + VMA_RW_MUTEX m_BudgetMutex; + uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; + uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; + uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; +#endif // VMA_MEMORY_BUDGET + + VmaCurrentBudgetData(); + + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); +}; + +#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +VmaCurrentBudgetData::VmaCurrentBudgetData() +{ + for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) + { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; + m_BlockBytes[heapIndex] = 0; + m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET + m_VulkanUsage[heapIndex] = 0; + m_VulkanBudget[heapIndex] = 0; + m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif + } + +#if VMA_MEMORY_BUDGET + m_OperationsSinceBudgetFetch = 0; +#endif +} + +void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} + +void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); + m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} +#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _VMA_CURRENT_BUDGET_DATA + +#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. +*/ +class VmaAllocationObjectAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) +public: + VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) + : m_Allocator(pAllocationCallbacks, 1024) {} + + template VmaAllocation Allocate(Types&&... args); + void Free(VmaAllocation hAlloc); + +private: + VMA_MUTEX m_Mutex; + VmaPoolAllocator m_Allocator; +}; + +template +VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args) +{ + VmaMutexLock mutexLock(m_Mutex); + return m_Allocator.Alloc(std::forward(args)...); +} + +void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) +{ + VmaMutexLock mutexLock(m_Mutex); + m_Allocator.Free(hAlloc); +} +#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _VMA_VIRTUAL_BLOCK_T +struct VmaVirtualBlock_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) +public: + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + + VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo); + ~VmaVirtualBlock_T(); + + VkResult Init() { return VK_SUCCESS; } + bool IsEmpty() const { return m_Metadata->IsEmpty(); } + void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); } + void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); } + void Clear() { m_Metadata->Clear(); } + + const VkAllocationCallbacks* GetAllocationCallbacks() const; + void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); + VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset); + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; +#if VMA_STATS_STRING_ENABLED + void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; +#endif + +private: + VmaBlockMetadata* m_Metadata; +}; + +#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo) + : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks) +{ + const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; + switch (algorithm) + { + case 0: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + break; + case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); + break; + default: + VMA_ASSERT(0); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + } + + m_Metadata->Init(createInfo.size); +} + +VmaVirtualBlock_T::~VmaVirtualBlock_T() +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_Metadata->IsEmpty()) + m_Metadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased virtual allocations. + VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + + vma_delete(GetAllocationCallbacks(), m_Metadata); +} + +const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const +{ + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +} + +void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo) +{ + m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo); +} + +VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset) +{ + VmaAllocationRequest request = {}; + if (m_Metadata->CreateAllocationRequest( + createInfo.size, // allocSize + VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment + (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress + VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant + createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy + &request)) + { + m_Metadata->Alloc(request, + VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant + createInfo.pUserData); + outAllocation = (VmaVirtualAllocation)request.allocHandle; + if(outOffset) + *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle); + return VK_SUCCESS; + } + outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE; + if (outOffset) + *outOffset = UINT64_MAX; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); +} + +#if VMA_STATS_STRING_ENABLED +void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const +{ + VmaJsonWriter json(GetAllocationCallbacks(), sb); + json.BeginObject(); + + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats); + + if (detailedMap) + { + json.WriteString("Details"); + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); + } + + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +#endif // _VMA_VIRTUAL_BLOCK_T + + +// Main allocator object. +struct VmaAllocator_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) +public: + const bool m_UseMutex; + const uint32_t m_VulkanApiVersion; + bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseExtMemoryBudget; + bool m_UseAmdDeviceCoherentMemory; + bool m_UseKhrBufferDeviceAddress; + bool m_UseExtMemoryPriority; + bool m_UseKhrMaintenance4; + bool m_UseKhrMaintenance5; + bool m_UseKhrExternalMemoryWin32; + const VkDevice m_hDevice; + const VkInstance m_hInstance; + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; + VmaAllocationObjectAllocator m_AllocationObjectAllocator; + + // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size. + uint32_t m_HeapSizeLimitMask; + + VkPhysicalDeviceProperties m_PhysicalDeviceProperties; + VkPhysicalDeviceMemoryProperties m_MemProps; + + // Default pools. + VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; + VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; + + VmaCurrentBudgetData m_Budget; + VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects. + + VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); + VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); + ~VmaAllocator_T(); + + const VkAllocationCallbacks* GetAllocationCallbacks() const + { + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; + } + const VmaVulkanFunctions& GetVulkanFunctions() const + { + return m_VulkanFunctions; + } + + VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; } + + VkDeviceSize GetBufferImageGranularity() const + { + return VMA_MAX( + static_cast(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY), + m_PhysicalDeviceProperties.limits.bufferImageGranularity); + } + + uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; } + uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; } + + uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const + { + VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount); + return m_MemProps.memoryTypes[memTypeIndex].heapIndex; + } + // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT. + bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const + { + return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) == + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + // Minimum alignment for all allocations in specific memory type. + VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const + { + return IsMemoryTypeNonCoherent(memTypeIndex) ? + VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : + (VkDeviceSize)VMA_MIN_ALIGNMENT; + } + + bool IsIntegratedGpu() const + { + return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + } + + uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + + void GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + void GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const; + + // Main allocation function. + VkResult AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Main deallocation function. + void FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations); + + void CalculateStatistics(VmaTotalStatistics* pStats); + + void GetHeapBudgets( + VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); + void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); + + VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); + void DestroyPool(VmaPool pool); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); + + void SetCurrentFrameIndex(uint32_t frameIndex); + uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } + + VkResult CheckPoolCorruption(VmaPool hPool); + VkResult CheckCorruption(uint32_t memoryTypeBits); + + // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. + VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); + // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. + void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); + // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR. + VkResult BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext); + // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR. + VkResult BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext); + + VkResult Map(VmaAllocation hAllocation, void** ppData); + void Unmap(VmaAllocation hAllocation); + + VkResult BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); + + VkResult FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op); + VkResult FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op); + + VkResult CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + VkResult CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size); + + void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + + /* + Returns bit mask of memory types that can support defragmentation on GPU as + they support creation of required buffer for copy operations. + */ + uint32_t GetGpuDefragmentationMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const + { + return m_TypeExternalMemoryHandleTypes[memTypeIndex]; + } +#endif // #if VMA_EXTERNAL_MEMORY + +private: + VkDeviceSize m_PreferredLargeHeapBlockSize; + + VkPhysicalDevice m_PhysicalDevice; + VMA_ATOMIC_UINT32 m_CurrentFrameIndex; + VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES]; +#endif // #if VMA_EXTERNAL_MEMORY + + VMA_RW_MUTEX m_PoolsMutex; + typedef VmaIntrusiveLinkedList PoolList; + // Protected by m_PoolsMutex. + PoolList m_Pools; + uint32_t m_NextPoolId; + + VmaVulkanFunctions m_VulkanFunctions; + + // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. + uint32_t m_GlobalMemoryTypeBits; + + void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Static(); +#endif + + void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Dynamic(); +#endif + + void ValidateVulkanFunctions(); + + VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); + + VkResult AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Helper function only to be used inside AllocateDedicatedMemory. + VkResult AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation); + + // Allocates and registers new VkDeviceMemory specifically for dedicated allocations. + VkResult AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain = VMA_NULL); + + void FreeDedicatedMemory(const VmaAllocation allocation); + + VkResult CalcMemTypeParams( + VmaAllocationCreateInfo& outCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount); + VkResult CalcAllocationParams( + VmaAllocationCreateInfo& outCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred); + + /* + Calculates and returns bit mask of memory types that can support defragmentation + on GPU as they support creation of required buffer for copy operations. + */ + uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; + uint32_t CalculateGlobalMemoryTypeBits() const; + + bool GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const; + +#if VMA_MEMORY_BUDGET + void UpdateVulkanBudget(); +#endif // #if VMA_MEMORY_BUDGET +}; + + +#ifndef _VMA_MEMORY_FUNCTIONS +static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) +{ + return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); +} + +static void VmaFree(VmaAllocator hAllocator, void* ptr) +{ + VmaFree(&hAllocator->m_AllocationCallbacks, ptr); +} + +template +static T* VmaAllocate(VmaAllocator hAllocator) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +template +static void vma_delete(VmaAllocator hAllocator, T* ptr) +{ + if(ptr != VMA_NULL) + { + ptr->~T(); + VmaFree(hAllocator, ptr); + } +} + +template +static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) +{ + if(ptr != VMA_NULL) + { + for(size_t i = count; i--; ) + ptr[i].~T(); + VmaFree(hAllocator, ptr); + } +} +#endif // _VMA_MEMORY_FUNCTIONS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS +VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) + : m_pMetadata(VMA_NULL), + m_MemoryTypeIndex(UINT32_MAX), + m_Id(0), + m_hMemory(VK_NULL_HANDLE), + m_MapCount(0), + m_pMappedData(VMA_NULL){} + +VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); + VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); +} + +void VmaDeviceMemoryBlock::Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity) +{ + VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + + m_hParentPool = hParentPool; + m_MemoryTypeIndex = newMemoryTypeIndex; + m_Id = id; + m_hMemory = newMemory; + + switch (algorithm) + { + case 0: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + default: + VMA_ASSERT(0); + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + } + m_pMetadata->Init(newSize); +} + +void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_pMetadata->IsEmpty()) + m_pMetadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased VmaAllocation objects. + VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); + allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); + m_hMemory = VK_NULL_HANDLE; + + vma_delete(allocator, m_pMetadata); + m_pMetadata = VMA_NULL; +} + +void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + m_MappingHysteresis.PostAlloc(); +} + +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if(m_MappingHysteresis.PostFree()) + { + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + } +} + +bool VmaDeviceMemoryBlock::Validate() const +{ + VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && + (m_pMetadata->GetSize() != 0)); + + return m_pMetadata->Validate(); +} + +VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) +{ + void* pData = VMA_NULL; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + res = m_pMetadata->CheckCorruption(pData); + + Unmap(hAllocator, 1); + + return res; +} + +VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) +{ + if (count == 0) + { + return VK_SUCCESS; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (oldTotalMapCount != 0) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount += count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + return VK_SUCCESS; + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + &m_pMappedData); + if (result == VK_SUCCESS) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount = count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + } + return result; + } +} + +void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) +{ + if (count == 0) + { + return; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if (m_MapCount >= count) + { + m_MapCount -= count; + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + m_MappingHysteresis.PostUnmap(); + } + else + { + VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); + } +} + +VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + VmaWriteMagicValue(pData, allocOffset + allocSize); + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + if (!VmaValidateMagicValue(pData, allocOffset + allocSize)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); + } + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); +} + +VkResult VmaDeviceMemoryBlock::BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); +} + +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + VMA_ASSERT(pHandle); + return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS + +#ifndef _VMA_ALLOCATION_T_FUNCTIONS +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) + : m_Alignment{ 1 }, + m_Size{ 0 }, + m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, + m_MemoryTypeIndex{ 0 }, + m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, + m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, + m_MapCount{ 0 }, + m_Flags{ 0 } +{ + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; +} + +VmaAllocation_T::~VmaAllocation_T() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + + // Check if owned string was freed. + VMA_ASSERT(m_pName == VMA_NULL); +} + +void VmaAllocation_T::InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(block != VMA_NULL); + m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_Alignment = alignment; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_SuballocationType = (uint8_t)suballocationType; + m_BlockAllocation.m_Block = block; + m_BlockAllocation.m_AllocHandle = allocHandle; +} + +void VmaAllocation_T::InitDedicatedAllocation( + VmaAllocator allocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(hMemory != VK_NULL_HANDLE); + m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; + m_Alignment = 0; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + m_SuballocationType = (uint8_t)suballocationType; + m_DedicatedAllocation.m_ExtraData = VMA_NULL; + m_DedicatedAllocation.m_hParentPool = hParentPool; + m_DedicatedAllocation.m_hMemory = hMemory; + m_DedicatedAllocation.m_Prev = VMA_NULL; + m_DedicatedAllocation.m_Next = VMA_NULL; + + if (pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + EnsureExtraData(allocator); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData; + } +} + +void VmaAllocation_T::Destroy(VmaAllocator allocator) +{ + FreeName(allocator); + + if (GetType() == ALLOCATION_TYPE_DEDICATED) + { + vma_delete(allocator, m_DedicatedAllocation.m_ExtraData); + } +} + +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) +{ + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); + + FreeName(hAllocator); + + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); +} + +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) +{ + VMA_ASSERT(allocation != VMA_NULL); + VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + std::swap(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); + +#if VMA_STATS_STRING_ENABLED + std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif + return m_MapCount; +} + +VmaAllocHandle VmaAllocation_T::GetAllocHandle() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_AllocHandle; + case ALLOCATION_TYPE_DEDICATED: + return VK_NULL_HANDLE; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceSize VmaAllocation_T::GetOffset() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle); + case ALLOCATION_TYPE_DEDICATED: + return 0; + default: + VMA_ASSERT(0); + return 0; + } +} + +VmaPool VmaAllocation_T::GetParentPool() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetParentPool(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hParentPool; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceMemory VmaAllocation_T::GetMemory() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetDeviceMemory(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hMemory; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +void* VmaAllocation_T::GetMappedData() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + if (m_MapCount != 0 || IsPersistentMap()) + { + void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); + VMA_ASSERT(pBlockData != VMA_NULL); + return (char*)pBlockData + GetOffset(); + } + else + { + return VMA_NULL; + } + break; + case ALLOCATION_TYPE_DEDICATED: + VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) == + (m_MapCount != 0 || IsPersistentMap())); + return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL; + default: + VMA_ASSERT(0); + return VMA_NULL; + } +} + +void VmaAllocation_T::BlockAllocMap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + if (m_MapCount < 0xFF) + { + ++m_MapCount; + } + else + { + VMA_ASSERT(0 && "Allocation mapped too many times simultaneously."); + } +} + +void VmaAllocation_T::BlockAllocUnmap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount > 0) + { + --m_MapCount; + } + else + { + VMA_ASSERT(0 && "Unmapping allocation not previously mapped."); + } +} + +VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + EnsureExtraData(hAllocator); + + if (m_MapCount != 0 || IsPersistentMap()) + { + if (m_MapCount < 0xFF) + { + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL); + *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData; + ++m_MapCount; + return VK_SUCCESS; + } + else + { + VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously."); + return VK_ERROR_MEMORY_MAP_FAILED; + } + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + ppData); + if (result == VK_SUCCESS) + { + m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData; + m_MapCount = 1; + } + return result; + } +} + +void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + + if (m_MapCount > 0) + { + --m_MapCount; + if (m_MapCount == 0 && !IsPersistentMap()) + { + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory); + } + } + else + { + VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped."); + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const +{ + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); + + json.WriteString("Size"); + json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. + + if (m_pUserData != VMA_NULL) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); + } + if (m_pName != VMA_NULL) + { + json.WriteString("Name"); + json.WriteString(m_pName); + } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR; + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle); + case ALLOCATION_TYPE_DEDICATED: + EnsureExtraData(hAllocator); + return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); + default: + VMA_ASSERT(0); + return VK_ERROR_FEATURE_NOT_PRESENT; + } +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // VMA_STATS_STRING_ENABLED + +void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator) +{ + if (m_DedicatedAllocation.m_ExtraData == VMA_NULL) + { + m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)(); + } +} + +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) +{ + if(m_pName) + { + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; + } +} +#endif // _VMA_ALLOCATION_T_FUNCTIONS + +#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS +VmaBlockVector::VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext) + : m_hAllocator(hAllocator), + m_hParentPool(hParentPool), + m_MemoryTypeIndex(memoryTypeIndex), + m_PreferredBlockSize(preferredBlockSize), + m_MinBlockCount(minBlockCount), + m_MaxBlockCount(maxBlockCount), + m_BufferImageGranularity(bufferImageGranularity), + m_ExplicitBlockSize(explicitBlockSize), + m_Algorithm(algorithm), + m_Priority(priority), + m_MinAllocationAlignment(minAllocationAlignment), + m_pMemoryAllocateNext(pMemoryAllocateNext), + m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), + m_NextBlockId(0) {} + +VmaBlockVector::~VmaBlockVector() +{ + for (size_t i = m_Blocks.size(); i--; ) + { + m_Blocks[i]->Destroy(m_hAllocator); + vma_delete(m_hAllocator, m_Blocks[i]); + } +} + +VkResult VmaBlockVector::CreateMinBlocks() +{ + for (size_t i = 0; i < m_MinBlockCount; ++i) + { + VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); + } +} + +bool VmaBlockVector::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + return m_Blocks.empty(); +} + +bool VmaBlockVector::IsCorruptionDetectionEnabled() const +{ + const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + return (VMA_DEBUG_DETECT_CORRUPTION != 0) && + (VMA_DEBUG_MARGIN > 0) && + (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) && + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags; +} + +VkResult VmaBlockVector::Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + size_t allocIndex; + VkResult res = VK_SUCCESS; + + alignment = VMA_MAX(alignment, m_MinAllocationAlignment); + + if (IsCorruptionDetectionEnabled()) + { + size = VmaAlignUp(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + alignment = VmaAlignUp(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + } + + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocatePage( + size, + alignment, + createInfo, + suballocType, + pAllocations + allocIndex); + if (res != VK_SUCCESS) + { + break; + } + } + } + + if (res != VK_SUCCESS) + { + // Free all already created allocations. + while (allocIndex--) + Free(pAllocations[allocIndex]); + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaBlockVector::AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VkDeviceSize freeMemory; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; + } + + const bool canFallbackToDedicated = !HasExplicitBlockSize() && + (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0; + const bool canCreateNewBlock = + ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && + (m_Blocks.size() < m_MaxBlockCount) && + (freeMemory >= size || !canFallbackToDedicated); + uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; + + // Upper address can only be used with linear allocator and within single memory block. + if (isUpperAddress && + (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + // Early reject: requested allocation size is larger that maximum block size for this block vector. + if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + // 1. Search existing allocations. Try to allocate. + if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Use only last block. + if (!m_Blocks.empty()) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + else + { + if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default + { + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) + { + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT + { + // Backward order in m_Blocks - prefer blocks with largest amount of free space. + for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + + // 2. Try to create new block. + if (canCreateNewBlock) + { + // Calculate optimal size for new block. + VkDeviceSize newBlockSize = m_PreferredBlockSize; + uint32_t newBlockSizeShift = 0; + const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; + + if (!m_ExplicitBlockSize) + { + // Allocate 1/8, 1/4, 1/2 as first blocks. + const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); + for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; + } + } + } + + size_t newBlockIndex = 0; + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + else + { + break; + } + } + } + + if (res == VK_SUCCESS) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; + VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + res = AllocateFromBlock( + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + else + { + // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + } + + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaBlockVector::Free(const VmaAllocation hAllocation) +{ + VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; + + bool budgetExceeded = false; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + budgetExceeded = heapBudget.usage >= heapBudget.budget; + } + + // Scope for lock. + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value."); + } + + if (hAllocation->IsPersistentMap()) + { + pBlock->Unmap(m_hAllocator, 1); + } + + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); + VMA_HEAVY_ASSERT(pBlock->Validate()); + + VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); + + const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; + // pBlock became empty after this deallocation. + if (pBlock->m_pMetadata->IsEmpty()) + { + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) + { + pBlockToDelete = pBlock; + Remove(pBlock); + } + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. + } + // pBlock didn't become empty, but we have another empty block - find and free that one. + // (This is optional, heuristics.) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) + { + VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); + if (pLastBlock->m_pMetadata->IsEmpty()) + { + pBlockToDelete = pLastBlock; + m_Blocks.pop_back(); + } + } + + IncrementallySortBlocks(); + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + hAllocation->Destroy(m_hAllocator); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); + } + + // Destruction of a free block. Deferred until this point, outside of mutex + // lock, for performance reason. + if (pBlockToDelete != VMA_NULL) + { + VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); + pBlockToDelete->Destroy(m_hAllocator); + vma_delete(m_hAllocator, pBlockToDelete); + } +} + +VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const +{ + VkDeviceSize result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); + if (result >= m_PreferredBlockSize) + { + break; + } + } + return result; +} + +void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) +{ + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + if (m_Blocks[blockIndex] == pBlock) + { + VmaVectorRemove(m_Blocks, blockIndex); + return; + } + } + VMA_ASSERT(0); +} + +void VmaBlockVector::IncrementallySortBlocks() +{ + if (!m_IncrementalSort) + return; + if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Bubble sort only until first swap. + for (size_t i = 1; i < m_Blocks.size(); ++i) + { + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + { + std::swap(m_Blocks[i - 1], m_Blocks[i]); + return; + } + } + } +} + +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + +VkResult VmaBlockVector::AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VmaAllocationRequest currRequest = {}; + if (pBlock->m_pMetadata->CreateAllocationRequest( + size, + alignment, + isUpperAddress, + suballocType, + strategy, + &currRequest)) + { + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); + } + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(m_hAllocator); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; +} + +VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) +{ + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.pNext = m_pMemoryAllocateNext; + allocInfo.memoryTypeIndex = m_MemoryTypeIndex; + allocInfo.allocationSize = blockSize; + +#if VMA_BUFFER_DEVICE_ADDRESS + // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if (m_hAllocator->m_UseKhrBufferDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } +#endif // VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if (m_hAllocator->m_UseExtMemoryPriority) + { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); + priorityInfo.priority = m_Priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex); + if (exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // VMA_EXTERNAL_MEMORY + + VkDeviceMemory mem = VK_NULL_HANDLE; + VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); + if (res < 0) + { + return res; + } + + // New VkDeviceMemory successfully created. + + // Create new Allocation for it. + VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator); + pBlock->Init( + m_hAllocator, + m_hParentPool, + m_MemoryTypeIndex, + mem, + allocInfo.allocationSize, + m_NextBlockId++, + m_Algorithm, + m_BufferImageGranularity); + + m_Blocks.push_back(pBlock); + if (pNewBlockIndex != VMA_NULL) + { + *pNewBlockIndex = m_Blocks.size() - 1; + } + + return VK_SUCCESS; +} + +bool VmaBlockVector::HasEmptyBlock() +{ + for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; + if (pBlock->m_pMetadata->IsEmpty()) + { + return true; + } + } + return false; +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + + json.BeginObject(); + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + json.BeginString(); + json.ContinueString(m_Blocks[i]->GetId()); + json.EndString(); + + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED + +VkResult VmaBlockVector::CheckCorruption() +{ + if (!IsCorruptionDetectionEnabled()) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VkResult res = pBlock->CheckCorruption(m_hAllocator); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +#endif // _VMA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_BreakCallback(info.pfnBreakCallback), + m_BreakCallbackUserData(info.pBreakCallbackUserData), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) +{ + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (info.pool != VMA_NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + if (hAllocator->GetBufferImageGranularity() > 1) + { + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); + } + break; + } +} + +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() +{ + if (m_PoolBlockVector != VMA_NULL) + { + m_PoolBlockVector->SetIncrementalSort(true); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); + } + } +} + +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) +{ + if (m_PoolBlockVector != VMA_NULL) + { + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != VMA_NULL) + { + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; + } + } + } + } + + moveInfo.moveCount = static_cast(m_Moves.size()); + if (moveInfo.moveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; + } + + moveInfo.pMoves = VMA_NULL; + return VK_SUCCESS; +} + +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) +{ + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); + + VkResult result = VK_SUCCESS; + VmaStlAllocator blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector> immovableBlocks(blockAllocator); + VmaVector> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) + { + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; + + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } + + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) + { + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); + } + + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); + + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + default: + VMA_ASSERT(0); + } + + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.deviceMemoryBlocksFreed += static_cast(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; + } + + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && + m_AlgorithmState != VMA_NULL) + { + // Avoid unnecessary tries to allocate when new free block is available + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) + { + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) + { + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + } + else + state.firstFreeBlock = 0; + } + } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + do + { + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) + { + if (m_AlgorithmState != VMA_NULL) + { + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) + { + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i + 1 < state.firstFreeBlock) + { + if (state.firstFreeBlock > 1) + std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } + } + swapped = true; + break; + } + } + } + } + if (swapped) + result = VK_INCOMPLETE; + break; + } + } + + // Move to the beginning + for (const FragmentedBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } while (false); + } + + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } + return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector, index, true); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); + default: + VMA_ASSERT(0); + return ComputeDefragmentation_Balanced(vector, index, true); + } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; + + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ + // Check custom criteria if exists + if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) + return CounterStatus::End; + + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + else + m_IgnoredAllocs = 0; + return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) + { + VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) +{ + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ + for (; start < end; ++start) + { + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &data.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves performed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ + // First free single block, then populate it to the brim, then free another block, and so on + + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); + + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateExtensive& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) + { + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No more blocks to free, just perform fast realloc and move to cleanup + if (vectorState.firstFreeBlock == 0) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + return ComputeDefragmentation_Fast(vector); + } + + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) + { + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + vectorState.firstFreeBlock = last; + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); + } + } + else + { + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + default: + VMA_ASSERT(0); + vectorState.operation = StateExtensive::Operation::MoveTextures; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + return ComputeDefragmentation_Extensive(vector, index); + } + break; + } + case StateExtensive::Operation::MoveTextures: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (texturePresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveBuffers: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveAll: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (otherPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + case StateExtensive::Operation::Cleanup: + // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). + break; + } + + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; + } + return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } + + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } + } + return prevMoveCount == m_Moves.size(); +} +#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _VMA_POOL_T_FUNCTIONS +VmaPool_T::VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize) + : m_BlockVector( + hAllocator, + this, // hParentPool + createInfo.memoryTypeIndex, + createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, + createInfo.minBlockCount, + createInfo.maxBlockCount, + (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), + createInfo.blockSize != 0, // explicitBlockSize + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm + createInfo.priority, + VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), + createInfo.pMemoryAllocateNext), + m_Id(0), + m_Name(VMA_NULL) {} + +VmaPool_T::~VmaPool_T() +{ + VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); +} + +void VmaPool_T::SetName(const char* pName) +{ + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); + + if (pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); + } + else + { + m_Name = VMA_NULL; + } +} +#endif // _VMA_POOL_T_FUNCTIONS + +#ifndef _VMA_ALLOCATOR_T_FUNCTIONS +VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : + m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), + m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), + m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), + m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), + m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), + m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), + m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), + m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), + m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), + m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), + m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0), + m_hDevice(pCreateInfo->device), + m_hInstance(pCreateInfo->instance), + m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? + *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), + m_AllocationObjectAllocator(&m_AllocationCallbacks), + m_HeapSizeLimitMask(0), + m_DeviceMemoryCount(0), + m_PreferredLargeHeapBlockSize(0), + m_PhysicalDevice(pCreateInfo->physicalDevice), + m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), + m_NextPoolId(0), + m_GlobalMemoryTypeBits(UINT32_MAX) +{ + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_UseKhrDedicatedAllocation = false; + m_UseKhrBindMemory2 = false; + } + + if(VMA_DEBUG_DETECT_CORRUPTION) + { + // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it. + VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0); + } + + VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance); + + if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0)) + { +#if !(VMA_DEDICATED_ALLOCATION) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); + } +#endif +#if !(VMA_BIND_MEMORY2) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros."); + } +#endif + } +#if !(VMA_MEMORY_BUDGET) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros."); + } +#endif +#if !(VMA_BUFFER_DEVICE_ADDRESS) + if(m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if VMA_VULKAN_VERSION < 1004000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 4, 0) && "vulkanApiVersion >= VK_API_VERSION_1_4 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1003000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 3, 0) && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1002000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 2, 0) && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1001000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0) && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if !(VMA_MEMORY_PRIORITY) + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE4) + if(m_UseKhrMaintenance4) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + +#if !(VMA_EXTERNAL_MEMORY_WIN32) + if(m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + + memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); + memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); + memset(&m_MemProps, 0, sizeof(m_MemProps)); + + memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); + memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); + +#if VMA_EXTERNAL_MEMORY + memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes)); +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) + { + m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; + m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; + m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; + } + + ImportVulkanFunctions(pCreateInfo->pVulkanFunctions); + + (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); + (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); + + VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT)); + VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize)); + + m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? + pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + + m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL) + { + memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes, + sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount()); + } +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) + { + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; + if(limit != VK_WHOLE_SIZE) + { + m_HeapSizeLimitMask |= 1u << heapIndex; + if(limit < m_MemProps.memoryHeaps[heapIndex].size) + { + m_MemProps.memoryHeaps[heapIndex].size = limit; + } + } + } + } + + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + // Create only supported types + if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) + { + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); + m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( + this, + VK_NULL_HANDLE, // hParentPool + memTypeIndex, + preferredBlockSize, + 0, + SIZE_MAX, + GetBufferImageGranularity(), + false, // explicitBlockSize + 0, // algorithm + 0.5f, // priority (0.5 is the default per Vulkan spec) + GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment + VMA_NULL); // // pMemoryAllocateNext + // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, + // because minBlockCount is 0. + } + } +} + +VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) +{ + VkResult res = VK_SUCCESS; + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET + + return res; +} + +VmaAllocator_T::~VmaAllocator_T() +{ + VMA_ASSERT(m_Pools.IsEmpty()); + + for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) + { + vma_delete(this, m_pBlockVectors[memTypeIndex]); + } +} + +void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) +{ +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Static(); +#endif + + if(pVulkanFunctions != VMA_NULL) + { + ImportVulkanFunctions_Custom(pVulkanFunctions); + } + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Dynamic(); +#endif + + ValidateVulkanFunctions(); +} + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Static() +{ + // Vulkan 1.0 + m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr; + m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr; + m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; + m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; + m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory; + m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory; + m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory; + m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges; + m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges; + m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory; + m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory; + m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements; + m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements; + m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer; + m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer; + m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; + m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; + m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + + // Vulkan 1.1 +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2; + m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; + m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; + m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; + m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; + } +#endif +} + +#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) +{ + VMA_ASSERT(pVulkanFunctions != VMA_NULL); + +#define VMA_COPY_IF_NOT_NULL(funcName) \ + if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; + + VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); + VMA_COPY_IF_NOT_NULL(vkAllocateMemory); + VMA_COPY_IF_NOT_NULL(vkFreeMemory); + VMA_COPY_IF_NOT_NULL(vkMapMemory); + VMA_COPY_IF_NOT_NULL(vkUnmapMemory); + VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory); + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkCreateBuffer); + VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); + VMA_COPY_IF_NOT_NULL(vkCreateImage); + VMA_COPY_IF_NOT_NULL(vkDestroyImage); + VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); +#endif + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR); +#endif +#undef VMA_COPY_IF_NOT_NULL +} + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Dynamic() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr && + "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass " + "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. " + "Other members can be null."); + +#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString); +#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString); + + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); + VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory"); + VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory"); + VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory"); + VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory"); + VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory"); + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer"); + VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer"); + VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage"); + VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage"); + VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer"); + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } +#endif + +#if VMA_DEDICATED_ALLOCATION + if(m_UseKhrDedicatedAllocation) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR"); + } +#endif + +#if VMA_BIND_MEMORY2 + if(m_UseKhrBindMemory2) + { + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR"); + } +#endif // #if VMA_BIND_MEMORY2 + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } +#endif // #if VMA_MEMORY_BUDGET + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); + } +#endif +#if VMA_KHR_MAINTENANCE4 + if(m_UseKhrMaintenance4) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); + } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR"); + } +#endif +#undef VMA_FETCH_DEVICE_FUNC +#undef VMA_FETCH_INSTANCE_FUNC +} + +#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ValidateVulkanFunctions() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) + { + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); + } +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2) + { + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); + } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL); + } +#endif + + // Not validating these due to suspected driver bugs with these function + // pointers being null despite correct extension or Vulkan version is enabled. + // See issue #397. Their usage in VMA is optional anyway. + // + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); +} + +VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) +{ + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE; + return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32); +} + +VkResult VmaAllocator_T::AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations != VMA_NULL); + VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); + + VmaAllocationCreateInfo finalCreateInfo = createInfo; + VkResult res = CalcMemTypeParams( + finalCreateInfo, + memTypeIndex, + size, + allocationCount); + if(res != VK_SUCCESS) + return res; + + if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + return AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + } + else + { + const bool canAllocateDedicated = + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && + (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize()); + + if(canAllocateDedicated) + { + // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. + if(size > blockVector.GetPreferredBlockSize() / 2) + { + dedicatedPreferred = true; + } + // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, + // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above + // 3/4 of the maximum allocation count. + if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && + m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) + { + dedicatedPreferred = false; + } + + if(dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + } + + res = blockVector.Allocate( + size, + alignment, + finalCreateInfo, + suballocType, + allocationCount, + pAllocations); + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Try dedicated memory. + if(canAllocateDedicated && !dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + // Everything failed: Return error code. + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } +} + +VkResult VmaAllocator_T::AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain) +{ + VMA_ASSERT(allocationCount > 0 && pAllocations); + + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.memoryTypeIndex = memTypeIndex; + allocInfo.allocationSize = size; + allocInfo.pNext = pNextChain; + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; + if(!canAliasMemory) + { + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + if(dedicatedBuffer != VK_NULL_HANDLE) + { + VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); + dedicatedAllocInfo.buffer = dedicatedBuffer; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + dedicatedAllocInfo.image = dedicatedImage; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + } + } +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + +#if VMA_BUFFER_DEVICE_ADDRESS + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if(m_UseKhrBufferDeviceAddress) + { + bool canContainBufferWithDeviceAddress = true; + if(dedicatedBuffer != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || + dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = false; + } + if(canContainBufferWithDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } + } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); + priorityInfo.priority = priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // #if VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex); + if(exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // #if VMA_EXTERNAL_MEMORY + + size_t allocIndex; + VkResult res = VK_SUCCESS; + for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocateDedicatedMemoryPage( + pool, + size, + suballocType, + memTypeIndex, + allocInfo, + map, + isUserDataString, + isMappingAllowed, + pUserData, + pAllocations + allocIndex); + if(res != VK_SUCCESS) + { + break; + } + } + + if(res == VK_SUCCESS) + { + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + dedicatedAllocations.Register(pAllocations[allocIndex]); + } + VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); + } + else + { + // Free all already created allocations. + while(allocIndex--) + { + VmaAllocation currAlloc = pAllocations[allocIndex]; + VkDeviceMemory hMemory = currAlloc->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(currAlloc->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); + m_AllocationObjectAllocator.Free(currAlloc); + } + + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation) +{ + VkDeviceMemory hMemory = VK_NULL_HANDLE; + VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory); + if(res < 0) + { + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } + + void* pMappedData = VMA_NULL; + if(map) + { + res = (*m_VulkanFunctions.vkMapMemory)( + m_hDevice, + hMemory, + 0, + VK_WHOLE_SIZE, + 0, + &pMappedData); + if(res < 0) + { + VMA_DEBUG_LOG(" vkMapMemory FAILED"); + FreeVulkanMemory(memTypeIndex, size, hMemory); + return res; + } + } + + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); + (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); + m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.buffer = hBuffer; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +void VmaAllocator_T::GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.image = hImage; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + +VkResult VmaAllocator_T::CalcMemTypeParams( + VmaAllocationCreateInfo& inoutCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount) +{ + // If memory type is not HOST_VISIBLE, disable MAPPED. + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && + (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0) + { + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + VmaBudget heapBudget = {}; + GetHeapBudgets(&heapBudget, heapIndex, 1); + if(heapBudget.usage + size * allocationCount > heapBudget.budget) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::CalcAllocationParams( + VmaAllocationCreateInfo& inoutCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred) +{ + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. + if(dedicatedRequired || + inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + if(inoutCreateInfo.pool != VK_NULL_HANDLE) + { + if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + + VMA_ASSERT(VmaIsPow2(vkMemReq.alignment)); + + if(vkMemReq.size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VmaAllocationCreateInfo createInfoFinal = createInfo; + VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation); + if(res != VK_SUCCESS) + return res; + + if(createInfoFinal.pool != VK_NULL_HANDLE) + { + VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; + return AllocateMemoryOfType( + createInfoFinal.pool, + vkMemReq.size, + vkMemReq.alignment, + prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + blockVector.GetMemoryTypeIndex(), + suballocType, + createInfoFinal.pool->m_DedicatedAllocations, + blockVector, + allocationCount, + pAllocations); + } + else + { + // Bit mask of memory Vulkan types acceptable for this allocation. + uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; + uint32_t memTypeIndex = UINT32_MAX; + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. + if(res != VK_SUCCESS) + return res; + do + { + VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); + res = AllocateMemoryOfType( + VK_NULL_HANDLE, + vkMemReq.size, + vkMemReq.alignment, + requiresDedicatedAllocation || prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + memTypeIndex, + suballocType, + m_DedicatedAllocations[memTypeIndex], + *blockVector, + allocationCount, + pAllocations); + // Allocation succeeded + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Remove old memTypeIndex from list of possibilities. + memoryTypeBits &= ~(1u << memTypeIndex); + // Find alternative memTypeIndex. + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + } while(res == VK_SUCCESS); + + // No other matching memory type index could be found. + // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } +} + +void VmaAllocator_T::FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations); + + for(size_t allocIndex = allocationCount; allocIndex--; ) + { + VmaAllocation allocation = pAllocations[allocIndex]; + + if(allocation != VK_NULL_HANDLE) + { + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); + } + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaBlockVector* pBlockVector = VMA_NULL; + VmaPool hPool = allocation->GetParentPool(); + if(hPool != VK_NULL_HANDLE) + { + pBlockVector = &hPool->m_BlockVector; + } + else + { + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + pBlockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); + } + pBlockVector->Free(allocation); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + FreeDedicatedMemory(allocation); + break; + default: + VMA_ASSERT(0); + } + } + } +} + +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) +{ + // Initialize. + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if (pBlockVector != VMA_NULL) + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + } + + // Process dedicated allocations. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); + } + + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); +} + +void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) +{ +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + if(m_Budget.m_OperationsSinceBudgetFetch < 30) + { + VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + { + outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + else + { + outBudgets->usage = 0; + } + + // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. + outBudgets->budget = VMA_MIN( + m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); + } + } + else + { + UpdateVulkanBudget(); // Outside of mutex lock + GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion + } + } + else +#endif + { + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + outBudgets->usage = outBudgets->statistics.blockBytes; + outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + } +} + +void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) +{ + pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); + pAllocationInfo->deviceMemory = hAllocation->GetMemory(); + pAllocationInfo->offset = hAllocation->GetOffset(); + pAllocationInfo->size = hAllocation->GetSize(); + pAllocationInfo->pMappedData = hAllocation->GetMappedData(); + pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); +} + +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) +{ + GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); + + switch (hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); + pAllocationInfo->dedicatedMemory = VK_FALSE; + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; + pAllocationInfo->dedicatedMemory = VK_TRUE; + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) +{ + VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + + VmaPoolCreateInfo newCreateInfo = *pCreateInfo; + + // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash. + if(pCreateInfo->pMemoryAllocateNext) + { + VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0); + } + + if(newCreateInfo.maxBlockCount == 0) + { + newCreateInfo.maxBlockCount = SIZE_MAX; + } + if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + if(newCreateInfo.minAllocationAlignment > 0) + { + VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); + } + + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); + + *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize); + + VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); + if(res != VK_SUCCESS) + { + vma_delete(this, *pPool); + *pPool = VMA_NULL; + return res; + } + + // Add to m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + (*pPool)->SetId(m_NextPoolId++); + m_Pools.PushBack(*pPool); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::DestroyPool(VmaPool pool) +{ + // Remove from m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + m_Pools.Remove(pool); + } + + vma_delete(this, pool); +} + +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) +{ + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} + +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); +} + +void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) +{ + m_CurrentFrameIndex.store(frameIndex); + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET +} + +VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) +{ + return hPool->m_BlockVector.CheckCorruption(); +} + +VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) +{ + VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT; + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if(pBlockVector != VMA_NULL) + { + VkResult localRes = pBlockVector->CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) + { + VkResult localRes = pool->m_BlockVector.CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + } + + return finalRes; +} + +VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) +{ + AtomicTransactionalIncrement deviceMemoryCountIncrement; + const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); +#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) + { + return VK_ERROR_TOO_MANY_OBJECTS; + } +#endif + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); + + // HeapSizeLimit is in effect for this heap. + if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) + { + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; + for(;;) + { + const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize; + if(blockBytesAfterAllocation > heapSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation)) + { + break; + } + } + } + else + { + m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; + } + ++m_Budget.m_BlockCount[heapIndex]; + + // VULKAN CALL vkAllocateMemory. + VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + + if(res == VK_SUCCESS) + { +#if VMA_MEMORY_BUDGET + ++m_Budget.m_OperationsSinceBudgetFetch; +#endif + + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); + } + + deviceMemoryCountIncrement.Commit(); + } + else + { + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; + } + + return res; +} + +void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) +{ + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData); + } + + // VULKAN CALL vkFreeMemory. + (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; + + --m_DeviceMemoryCount; +} + +VkResult VmaAllocator_T::BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL) + { + VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.buffer = buffer; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL) + { + VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.image = image; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + char *pBytes = VMA_NULL; + VkResult res = pBlock->Map(this, 1, (void**)&pBytes); + if(res == VK_SUCCESS) + { + *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset(); + hAllocation->BlockAllocMap(); + } + return res; + } + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + return hAllocation->DedicatedAllocMap(this, ppData); + default: + VMA_ASSERT(0); + return VK_ERROR_MEMORY_MAP_FAILED; + } +} + +void VmaAllocator_T::Unmap(VmaAllocation hAllocation) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + hAllocation->BlockAllocUnmap(); + pBlock->Unmap(this, 1); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + hAllocation->DedicatedAllocUnmap(this); + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block."); + res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block."); + res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op) +{ + VkResult res = VK_SUCCESS; + + VkMappedMemoryRange memRange = {}; + if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange)) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op) +{ + typedef VmaStlAllocator RangeAllocator; + typedef VmaSmallVector RangeVector; + RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks())); + + for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + const VmaAllocation alloc = allocations[allocIndex]; + const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0; + const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE; + VkMappedMemoryRange newRange; + if(GetFlushOrInvalidateRange(alloc, offset, size, newRange)) + { + ranges.push_back(newRange); + } + } + + VkResult res = VK_SUCCESS; + if(!ranges.empty()) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + void* dstMappedData = VMA_NULL; + VkResult res = Map(dstAllocation, &dstMappedData); + if(res == VK_SUCCESS) + { + memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); + Unmap(dstAllocation); + res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); + } + return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + void* srcMappedData = VMA_NULL; + VkResult res = Map(srcAllocation, &srcMappedData); + if(res == VK_SUCCESS) + { + res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); + if(res == VK_SUCCESS) + { + memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); + Unmap(srcAllocation); + } + } + return res; +} + +void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) +{ + VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + VmaPool parentPool = allocation->GetParentPool(); + if(parentPool == VK_NULL_HANDLE) + { + // Default pool + m_DedicatedAllocations[memTypeIndex].Unregister(allocation); + } + else + { + // Custom pool + parentPool->m_DedicatedAllocations.Unregister(allocation); + } + + VkDeviceMemory hMemory = allocation->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(allocation->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + allocation->Destroy(this); + m_AllocationObjectAllocator.Free(allocation); + + VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); +} + +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ + VkBufferCreateInfo dummyBufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + + uint32_t memoryTypeBits = 0; + + // Create buffer. + VkBuffer buf = VK_NULL_HANDLE; + VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( + m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); + if(res == VK_SUCCESS) + { + // Query for supported memory types. + VkMemoryRequirements memReq; + (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); + memoryTypeBits = memReq.memoryTypeBits; + + // Destroy buffer. + (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); + } + + return memoryTypeBits; +} + +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ + // Make sure memory information is already fetched. + VMA_ASSERT(GetMemoryTypeCount() > 0); + + uint32_t memoryTypeBits = UINT32_MAX; + + if(!m_UseAmdDeviceCoherentMemory) + { + // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + memoryTypeBits &= ~(1u << memTypeIndex); + } + } + } + + return memoryTypeBits; +} + +bool VmaAllocator_T::GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const +{ + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) + { + const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; + const VkDeviceSize allocationSize = allocation->GetSize(); + VMA_ASSERT(offset <= allocationSize); + + outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + outRange.pNext = VMA_NULL; + outRange.memory = allocation->GetMemory(); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + outRange.size = allocationSize - outRange.offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + outRange.size = VMA_MIN( + VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize), + allocationSize - outRange.offset); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + // 1. Still within this allocation. + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + size = allocationSize - offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + } + outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize); + + // 2. Adjust to whole block. + const VkDeviceSize allocationOffset = allocation->GetOffset(); + VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); + const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize(); + outRange.offset += allocationOffset; + outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset); + + break; + } + default: + VMA_ASSERT(0); + } + return true; + } + return false; +} + +#if VMA_MEMORY_BUDGET +void VmaAllocator_T::UpdateVulkanBudget() +{ + VMA_ASSERT(m_UseExtMemoryBudget); + + VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR }; + + VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT }; + VmaPnextChainPushFront(&memProps, &budgetProps); + + GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps); + + { + VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); + + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex]; + m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex]; + m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load(); + + // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size. + if(m_Budget.m_VulkanBudget[heapIndex] == 0) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size; + } + if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0) + { + m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + } + m_Budget.m_OperationsSinceBudgetFetch = 0; + } +} +#endif // VMA_MEMORY_BUDGET + +void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) +{ + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && + hAllocation->IsMappingAllowed() && + (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) + { + void* pData = VMA_NULL; + VkResult res = Map(hAllocation, &pData); + if(res == VK_SUCCESS) + { + memset(pData, (int)pattern, (size_t)hAllocation->GetSize()); + FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH); + Unmap(hAllocation); + } + else + { + VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation."); + } + } +} + +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ + uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); + if(memoryTypeBits == UINT32_MAX) + { + memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); + m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); + } + return memoryTypeBits; +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) +{ + json.WriteString("DefaultPools"); + json.BeginObject(); + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); + + json.WriteString("Blocks"); + pBlockVector->PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + dedicatedAllocList.BuildStatsString(json); + } + json.EndObject(); + } + } + } + json.EndObject(); + + json.WriteString("CustomPools"); + json.BeginObject(); + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + if (!m_Pools.IsEmpty()) + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } + + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString((uint64_t)index++); + if (pool->GetName()) + { + json.ContinueString(" - "); + json.ContinueString(pool->GetName()); + } + json.EndString(); + + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } + } + + if (!displayType) + json.EndArray(); + } + } + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_ALLOCATOR_T_FUNCTIONS + + +#ifndef _VMA_PUBLIC_INTERFACE +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* pCreateInfo, + VmaAllocator* pAllocator) +{ + VMA_ASSERT(pCreateInfo && pAllocator); + VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || + (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 4)); + VMA_DEBUG_LOG("vmaCreateAllocator"); + *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); + VkResult result = (*pAllocator)->Init(pCreateInfo); + if(result < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator); + *pAllocator = VK_NULL_HANDLE; + } + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator allocator) +{ + if(allocator != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyAllocator"); + VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, allocator); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo) +{ + VMA_ASSERT(allocator && pAllocatorInfo); + pAllocatorInfo->instance = allocator->m_hInstance; + pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice(); + pAllocatorInfo->device = allocator->m_hDevice; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator allocator, + const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceProperties); + *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator allocator, + const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties); + *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* pFlags) +{ + VMA_ASSERT(allocator && pFlags); + VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount()); + *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator allocator, + uint32_t frameIndex) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->SetCurrentFrameIndex(frameIndex); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator allocator, + VmaTotalStatistics* pStats) +{ + VMA_ASSERT(allocator && pStats); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->CalculateStatistics(pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator allocator, + VmaBudget* pBudgets) +{ + VMA_ASSERT(allocator && pBudgets); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount()); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator allocator, + char** ppStatsString, + VkBool32 detailedMap) +{ + VMA_ASSERT(allocator && ppStatsString); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VmaStringBuilder sb(allocator->GetAllocationCallbacks()); + { + VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; + allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); + + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); + + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); + json.BeginObject(); + { + json.WriteString("General"); + json.BeginObject(); + { + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + + json.WriteString("API"); + json.WriteString("Vulkan"); + + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); + + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); + } + json.EndObject(); + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); + { + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) + { + json.BeginString("Heap "); + json.ContinueString(heapIndex); + json.EndString(); + json.BeginObject(); + { + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif + + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } + json.EndObject(); + } + } + json.EndObject(); + } + + if (detailedMap == VK_TRUE) + allocator->PrintDetailedMap(json); + + json.EndObject(); + } + + *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator allocator, + char* pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(allocator); + VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString); + } +} + +#endif // VMA_STATS_STRING_ENABLED + +/* +This function is not protected by any mutex because it just reads immutable data. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pBufferCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceBufferMemoryRequirements) + { + // Can query straight from VkBufferCreateInfo :) + VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; + devBufMemReq.pCreateInfo = pBufferCreateInfo; + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy buffer to query :( + VkBuffer hBuffer = VK_NULL_HANDLE; + res = funcs->vkCreateBuffer( + hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + + funcs->vkDestroyBuffer( + hDev, hBuffer, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pImageCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceImageMemoryRequirements) + { + // Can query straight from VkImageCreateInfo :) + VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; + devImgMemReq.pCreateInfo = pImageCreateInfo; + VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && + "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy image to query :( + VkImage hImage = VK_NULL_HANDLE; + res = funcs->vkCreateImage( + hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + + funcs->vkDestroyImage( + hDev, hImage, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator allocator, + const VmaPoolCreateInfo* pCreateInfo, + VmaPool* pPool) +{ + VMA_ASSERT(allocator && pCreateInfo && pPool); + + VMA_DEBUG_LOG("vmaCreatePool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CreatePool(pCreateInfo, pPool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator allocator, + VmaPool pool) +{ + VMA_ASSERT(allocator); + + if(pool == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyPool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->DestroyPool(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaDetailedStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->CalculatePoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VMA_DEBUG_LOG("vmaCheckPoolCorruption"); + + return allocator->CheckPoolCorruption(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char** ppName) +{ + VMA_ASSERT(allocator && pool && ppName); + + VMA_DEBUG_LOG("vmaGetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *ppName = pool->GetName(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char* pName) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_LOG("vmaSetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + pool->SetName(pName); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + size_t allocationCount, + VmaAllocation* pAllocations, + VmaAllocationInfo* pAllocationInfo) +{ + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations); + + VMA_DEBUG_LOG("vmaAllocateMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + allocationCount, + pAllocations); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + for(size_t i = 0; i < allocationCount; ++i) + { + allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i); + } + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator allocator, + VkBuffer buffer, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(buffer, vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + buffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator allocator, + VkImage image, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(image, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + image, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaFreeMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory( + 1, // allocationCount + &allocation); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator allocator, + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + if(allocationCount == 0) + { + return; + } + + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaFreeMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory(allocationCount, pAllocations); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo2* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo2(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator allocator, + VmaAllocation allocation, + void* pUserData) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocation->SetUserData(allocator, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) +{ + allocation->SetName(allocator, pName); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags) +{ + VMA_ASSERT(allocator && allocation && pFlags); + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator allocator, + VmaAllocation allocation, + void** ppData) +{ + VMA_ASSERT(allocator && allocation && ppData); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->Map(allocation, ppData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->Unmap(allocation); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaFlushAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaInvalidateAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaFlushAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaInvalidateAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator allocator, + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator allocator, + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator allocator, + uint32_t memoryTypeBits) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaCheckCorruption"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CheckCorruption(memoryTypeBits); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator allocator, + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) +{ + VMA_ASSERT(allocator && pInfo && pContext); + + VMA_DEBUG_LOG("vmaBeginDefragmentation"); + + if (pInfo->pool != VMA_NULL) + { + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator allocator, + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) +{ + VMA_ASSERT(allocator && context); + + VMA_DEBUG_LOG("vmaEndDefragmentation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassBegin(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassEnd(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkBuffer buffer) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer buffer, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkImage image) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, 0, image, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkImage image, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBufferWithAlignment"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 2a. Include minAlignment + vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize()); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer2"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator allocator, + VkBuffer buffer, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(buffer != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); + } + + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkImage* pImage, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation); + + if(pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pImage = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if(res == VK_SUCCESS) + { + VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; + + // 2. Allocate memory using allocator. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(*pImage, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + *pImage, // dedicatedImage + VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + suballocType, + 1, // allocationCount + pAllocation); + + if(res == VK_SUCCESS) + { + // 3. Bind image with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); + } + if(res == VK_SUCCESS) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitImageUsage(*pImageCreateInfo); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage2"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) +{ + VMA_ASSERT(allocator); + + if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(image != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); + } + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock) +{ + VMA_ASSERT(pCreateInfo && pVirtualBlock); + VMA_ASSERT(pCreateInfo->size > 0); + VMA_DEBUG_LOG("vmaCreateVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo); + VkResult res = (*pVirtualBlock)->Init(); + if(res < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock); + *pVirtualBlock = VK_NULL_HANDLE; + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock) +{ + if(virtualBlock != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, virtualBlock); + } +} + +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL); + VMA_DEBUG_LOG("vmaVirtualAllocate"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation) +{ + if(allocation != VK_NULL_HANDLE) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaVirtualFree"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Free(allocation); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaClearVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Clear(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->SetAllocationUserData(allocation, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks(); + VmaStringBuilder sb(allocationCallbacks); + virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb); + *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); + } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle) +{ + VMA_ASSERT(allocator && allocation && pHandle); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_PUBLIC_INTERFACE +#endif // VMA_IMPLEMENTATION + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup + +Vulkan Memory Allocator comes in form of a "stb-style" single header file. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. + +"Single header" doesn't mean that everything is contained in C/C++ declarations, +like it tends to be in case of inline functions or C++ templates. +It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. +If you don't do it properly, it will result in linker errors. + +To do it properly: + +-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. + This includes declarations of all members of the library. +-# In exactly one CPP file define following macro before this include. + It enables also internal definitions. + +\code +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" +\endcode + +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". + +This library includes header ``, which in turn +includes `` on Windows. If you need some specific macros defined +before including these headers (like `WIN32_LEAN_AND_MEAN` or +`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define +them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. + +This library is written in C++, but has C-compatible interface. +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full +implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. + + +\section quick_start_initialization Initialization + +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + +At program startup: + +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. + +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags. +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. + +\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version + +VMA supports Vulkan version down to 1.0, for backward compatibility. +If you want to use higher version, you need to inform the library about it. +This is a two-step process. + +Step 1: Compile time. By default, VMA compiles with code supporting the highest +Vulkan version found in the included `` that is also supported by the library. +If this is OK, you don't need to do anything. +However, if you want to compile VMA as if only some lower Vulkan version was available, +define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`. +It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version. +For example, to compile against Vulkan 1.2: + +\code +#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 +#include "vk_mem_alloc.h" +\endcode + +Step 2: Runtime. Even when compiled with higher Vulkan version available, +VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object. +By default, only Vulkan 1.0 is used. +To initialize the allocator with support for higher Vulkan version, you need to set member +VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`. +See code sample below. + +\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions + +You may need to configure importing Vulkan functions. There are 3 ways to do this: + +-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows): + - You don't need to do anything. + - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default. +-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`, + `vkGetDeviceProcAddr` (this is the option presented in the example below): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1. + - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr, + VmaVulkanFunctions::vkGetDeviceProcAddr. + - The library will fetch pointers to all other functions it needs internally. +-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like + [Volk](https://github.com/zeux/volk): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. + - Pass these pointers via structure #VmaVulkanFunctions. + +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +VK_KHR_external_memory_win32 | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT + +Example with fetching pointers to Vulkan functions dynamically: + +\code +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#include "vk_mem_alloc.h" + +... + +VmaVulkanFunctions vulkanFunctions = {}; +vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; +vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; + +VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; +allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; +allocatorCreateInfo.physicalDevice = physicalDevice; +allocatorCreateInfo.device = device; +allocatorCreateInfo.instance = instance; +allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + +VmaAllocator allocator; +vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); +\endcode + + +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + +\section quick_start_resource_allocation Resource allocation + +When you want to create a buffer or image: + +-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. +-# Fill VmaAllocationCreateInfo structure. +-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory + already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +Don't forget to destroy your buffer and allocation objects when no longer needed: + +\code +vmaDestroyBuffer(allocator, buffer, allocation); +\endcode + +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + + +\page choosing_memory_type Choosing memory type + +Physical devices in Vulkan support various combinations of memory heaps and +types. Help with choosing correct and optimal memory type for your specific +resource is one of the key features of this library. You can use it by filling +appropriate members of VmaAllocationCreateInfo structure, as described below. +You can also combine multiple methods. + +-# If you just want to find memory type index that meets your requirements, you + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). +-# If you want to allocate a region of device memory without association with any + specific image or buffer, you can use function vmaAllocateMemory(). Usage of + this function is not recommended and usually not needed. + vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, + which may be useful for sparse binding. +-# If you already have a buffer or an image created, you want to allocate memory + for it and then you will bind it yourself, you can use function + vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). + For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() + or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). +-# If you want to create a buffer or an image, allocate memory for it, and bind + them together, all in one call, you can use function vmaCreateBuffer(), + vmaCreateImage(). + This is the easiest and recommended way to use this library! + +When using 3. or 4., the library internally queries Vulkan for memory types +supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) +and uses only one of these types. + +If no memory type can be found that meets all the requirements, these functions +return `VK_ERROR_FEATURE_NOT_PRESENT`. + +You can leave VmaAllocationCreateInfo structure completely filled with zeros. +It means no requirements are specified for memory type. +It is valid, although not very useful. + +\section choosing_memory_type_usage Usage + +The easiest way to specify memory requirements is to fill member +VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. +It defines high level, common usage types. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. + +For example, if you want to create a uniform buffer that will be filled using +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer described previously can be created like this. +It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as described below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are deprecated. + +\section choosing_memory_type_required_preferred_flags Required and preferred flags + +You can specify more detailed requirements by filling members +VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags +with a combination of bits from enum `VkMemoryPropertyFlags`. For example, +if you want to create a buffer that will be persistently mapped on host (so it +must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, +use following code: + +\code +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +A memory type is chosen that has all the required flags and as many preferred +flags set as possible. + +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). + +\section choosing_memory_type_explicit_memory_types Explicit memory types + +If you inspected memory types available on the physical device and you have +a preference for memory types that you want to use, you can fill member +VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set +means that a memory type with that index is allowed to be used for the +allocation. Special value 0, just like `UINT32_MAX`, means there are no +restrictions to memory type index. + +Please note that this member is NOT just a memory type index. +Still you can use it to choose just one, specific memory type. +For example, if you already determined that your buffer should be created in +memory type 2, use following code: + +\code +uint32_t memoryTypeIndex = 2; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.memoryTypeBits = 1u << memoryTypeIndex; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +You can also use this parameter to exclude some memory types. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + + +\section choosing_memory_type_custom_memory_pools Custom memory pools + +If you allocate from custom memory pool, all the ways of specifying memory +requirements described above are not applicable and the aforementioned members +of VmaAllocationCreateInfo structure are ignored. Memory type is selected +explicitly when creating the pool and then used to make all the allocations from +that pool. For further details, see \ref custom_memory_pools. + +\section choosing_memory_type_dedicated_allocations Dedicated allocations + +Memory for allocations is reserved out of larger block of `VkDeviceMemory` +allocated from Vulkan internally. That is the main feature of this whole library. +You can still request a separate memory block to be created for an allocation, +just like you would do in a trivial solution without using any allocator. +In that case, a buffer or image is always bound to that memory at offset 0. +This is called a "dedicated allocation". +You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +The library can also internally decide to use dedicated allocation in some cases, e.g.: + +- When the size of the allocation is large. +- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled + and it reports that dedicated allocation is required or recommended for the resource. +- When allocation of next big memory block fails due to not enough device memory, + but allocation with the exact requested size succeeds. + + +\page memory_mapping Memory mapping + +To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, +to be able to read from it or write to it in CPU code. +Mapping is possible only of memory allocated from a memory type that has +`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. +You can use them directly with memory allocated by this library, +but it is not recommended because of following issue: +Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. +This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. +Because of this, Vulkan Memory Allocator provides following facilities: + +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). + +\section memory_mapping_mapping_functions Mapping functions + +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). +They are safer and more convenient to use than standard Vulkan functions. +You can map an allocation multiple times simultaneously - mapping is reference-counted internally. +You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. +The way it is implemented is that the library always maps entire memory block, not just region of the allocation. +For further details, see description of vmaMapMemory() function. +Example: + +\code +// Having these objects initialized: +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... + +// You can map and fill your buffer using following code: + +void* mappedData; +vmaMapMemory(allocator, constantBufferAllocation, &mappedData); +memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); +vmaUnmapMemory(allocator, constantBufferAllocation); +\endcode + +When mapping, you may see a warning from Vulkan validation layer similar to this one: + +Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used. + +It happens because the library maps entire `VkDeviceMemory` block, where different +types of images and buffers may end up together, especially on GPUs with unified memory like Intel. +You can safely ignore it if you are sure you access only memory of the intended +object that you wanted to map. + + +\section memory_mapping_persistently_mapped_memory Persistently mapped memory + +Keeping your memory persistently mapped is generally OK in Vulkan. +You don't need to unmap it before using its data on the GPU. +The library provides a special feature designed for that: +Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in +VmaAllocationCreateInfo::flags stay mapped all the time, +so you can just access CPU pointer to it any time +without a need to call any "map" or "unmap" function. +Example: + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +// Buffer is already mapped. You can access its memory. +memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); +\endcode + +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. + +\section memory_mapping_cache_control Cache flush and invalidate + +Memory in Vulkan doesn't need to be unmapped before using it on GPU, +but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. +Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, +`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient +functions that refer to given allocation object: vmaFlushAllocation(), +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). + +Regions of memory specified for flush/invalidate must be aligned to +`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. +In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations +within blocks are aligned to this value, so their offsets are always multiply of +`nonCoherentAtomSize` and two different allocations never share same "line" of this size. + +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +currently provide `HOST_COHERENT` flag on all memory types that are +`HOST_VISIBLE`, so on PC you may not need to bother. + + +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it is physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM + and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly + freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` + returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetHeapBudgets(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use VK_EXT_memory_budget device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 + required by it are available and enable them. Please note that the first is a device + extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from + Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +On AMD graphics cards there is a custom vendor extension available: VK_AMD_memory_overallocation_behavior +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + +Remember that using resources that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset in an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + +\page custom_memory_pools Custom memory pools + +A memory pool contains a number of `VkDeviceMemory` blocks. +The library automatically creates and manages default pool for each memory type available on the device. +Default memory pool automatically grows in size. +Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. + +You can create custom pool and allocate memory out of it. +It can be useful if you want to: + +- Keep certain kind of allocations separate from others. +- Enforce particular, fixed size of Vulkan memory blocks. +- Limit maximum amount of Vulkan memory allocated for that pool. +- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. +- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in + #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. + +To use custom memory pools: + +-# Fill VmaPoolCreateInfo structure. +-# Call vmaCreatePool() to obtain #VmaPool handle. +-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. + You don't need to specify any other parameters of this structure, like `usage`. + +Example: + +\code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a pool that can have at most 2 blocks, 128 MiB each. +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.blockSize = 128ull * 1024 * 1024; +poolCreateInfo.maxBlockCount = 2; + +VmaPool pool; +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... + +// Allocate a buffer out of it. +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 1024; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... +\endcode + +You have to free all allocations made from this pool before destroying it. + +\code +vmaDestroyBuffer(allocator, buf, alloc); +vmaDestroyPool(allocator, pool); +\endcode + +New versions of this library support creating dedicated allocations in custom pools. +It is supported only when VmaPoolCreateInfo::blockSize = 0. +To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and +VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + + +\section custom_memory_pools_MemTypeIndex Choosing memory type index + +When creating a pool, you must explicitly specify memory type index. +To find the one suitable for your buffers or images, you can use helper functions +vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). +You need to provide structures with example parameters of buffers or images +that you are going to create in that pool. + +\code +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); + +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +// ... +\endcode + +When creating buffers/images allocated in that pool, provide following parameters: + +- `VkBufferCreateInfo`: Prefer to pass same parameters as above. + Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. + Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers + or the other way around. +- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. + Other members are ignored anyway. + + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. + Some of them may be partially or even completely empty. + Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). + When using default pools, best memory type for each of your allocations can be selected automatically + using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. + When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, + while default pools start from small blocks and only allocate next blocks larger and larger + up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) + separate, you likely don't need to. + VMA uses a high quality allocation algorithm that manages memory well in various cases. + Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. + VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. + VMA respects `nonCoherentAtomSize` limit automatically. + It also maps only those `VkDeviceMemory` blocks that need to map any allocation. + It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead + using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, + you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it + using default pools with dedicated function vmaCreateBufferWithAlignment(). + + +\section linear_algorithm Linear allocation algorithm + +Each Vulkan memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating +#VmaPool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\subsection linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT +to VmaAllocationCreateInfo::flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual +`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +\subsection linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. + +Example: + +\code +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; + +VmaDefragmentationContext defragCtx; +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... + +for(;;) +{ + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. + VmaAllocationInfo allocInfo; + vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); + } + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... +} + +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode + +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. + +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. + +Mapping is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from Vulkan. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay within budget +(see also \ref staying_within_budget). +Example: + +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). +It fill structure #VmaAllocationInfo. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). +The result is guaranteed to be correct JSON. +It uses ANSI encoding. +Any strings provided by user (see [Allocation names](@ref allocation_names)) +are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, +this JSON string can be treated as using this encoding. +It must be freed using function vmaFreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page allocation_annotation Allocation names and user data + +\section allocation_user_data Allocation user data + +You can annotate allocations with your own information, e.g. for debugging purposes. +To do that, fill VmaAllocationCreateInfo::pUserData field when creating +an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, +some handle, index, key, ordinal number or any other value that would associate +the allocation with your custom metadata. +It is useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. + +\code +VkBufferCreateInfo bufCreateInfo = ... + +MyBufferMetadata* pMetadata = CreateBufferMetadata(); + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.pUserData = pMetadata; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +\endcode + +The pointer may be later retrieved as VmaAllocationInfo::pUserData: + +\code +VmaAllocationInfo allocInfo; +vmaGetAllocationInfo(allocator, allocation, &allocInfo); +MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; +\endcode + +It can also be changed using function vmaSetAllocationUserData(). + +Values of (non-zero) allocations' `pUserData` are printed in JSON report created by +vmaBuildStatsString() in hexadecimal form. + +\section allocation_names Allocation names + +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). +The library creates internal copy of the string, so the pointer you pass doesn't need +to be valid for whole lifetime of the allocation. You can free it after the call. + +\code +std::string imageName = "Texture: "; +imageName += fileName; +vmaSetAllocationName(allocator, allocation, imageName.c_str()); +\endcode + +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). + +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have #VmaAllocator object created. +All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in #VmaVirtualBlockCreateInfo structure. +-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object. + +Example: + +\code +VmaVirtualBlockCreateInfo blockCreateInfo = {}; +blockCreateInfo.size = 1048576; // 1 MB + +VmaVirtualBlock block; +VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main Vulkan memory allocator. +Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type +that represents an opaque handle to an allocation within the virtual block. + +In order to make such allocation: + +-# Fill in #VmaVirtualAllocationCreateInfo structure. +-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation. + You can also receive `VkDeviceSize offset` that was assigned to the allocation. + +Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB + +VmaVirtualAllocation alloc; +VkDeviceSize offset; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset); +if(res == VK_SUCCESS) +{ + // Use the 4 KB of your memory starting at offset. +} +else +{ + // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling vmaVirtualFree(). +You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate() +called for the same #VmaVirtualBlock. + +When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock(). +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once - +a feature not available in normal Vulkan memory allocator. Example: + +\code +vmaVirtualFree(block, alloc); +vmaDestroyVirtualBlock(block); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData(). +Its default value is null. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ + std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +vmaSetVirtualAllocationUserData(block, alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VmaVirtualAllocationInfo allocInfo; +vmaGetVirtualAllocationInfo(block, alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pUserData; + +vmaVirtualFree(block, alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +VmaVirtualAllocationCreateInfo::alignment to request it. Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB +allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B + +VmaVirtualAllocation alloc; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- VmaVirtualBlockCreateInfo::size +- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment +- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset + +\section virtual_allocator_statistics Statistics + +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. +Example: + +\code +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", + stats.allocationBytes, stats.allocationCount); +\endcode + +You can also request a full list of allocations and free regions as a string in JSON format by calling +vmaBuildVirtualBlockStatsString(). +Returned string must be later freed using vmaFreeVirtualBlockStatsString(). +The format of this string differs from the one returned by the main Vulkan allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +The "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + +Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory. +See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT). +You can find their description in chapter \ref custom_memory_pools. +Allocation strategies are also supported. +See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT). + +Following features are supported only by the allocator of the real GPU memory and not by virtual allocations: +buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`. + + +\page debugging_memory_usage Debugging incorrect memory usage + +If you suspect a bug with memory usage, like usage of uninitialized memory or +memory being overwritten out of bounds of an allocation, +you can use debug features of this library to verify this. + +\section debugging_memory_usage_initialization Memory initialization + +If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, +you can enable automatic memory initialization to verify this. +To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. + +\code +#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 +#include "vk_mem_alloc.h" +\endcode + +It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`. +Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. +Memory is automatically mapped and unmapped if necessary. + +If you find these values while debugging your program, good chances are that you incorrectly +read Vulkan memory that is allocated but not initialized, or already freed, respectively. + +Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped. +It works also with dedicated allocations. + +\section debugging_memory_usage_margins Margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified +number of bytes as a margin after every allocation. + +\code +#define VMA_DEBUG_MARGIN 16 +#include "vk_mem_alloc.h" +\endcode + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all types of memory. + +Margin is applied only to allocations made out of memory blocks and not to dedicated +allocations, which have their own memory block of specific size. +It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag +or those automatically decided to put into dedicated allocations, e.g. due to its +large size or recommended by VK_KHR_dedicated_allocation extension. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + +\section debugging_memory_usage_corruption_detection Corruption detection + +You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation +of contents of the margins. + +\code +#define VMA_DEBUG_MARGIN 16 +#define VMA_DEBUG_DETECT_CORRUPTION 1 +#include "vk_mem_alloc.h" +\endcode + +When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` +(it must be multiply of 4) after every allocation is filled with a magic number. +This idea is also know as "canary". +Memory is automatically mapped and unmapped if necessary. + +This number is validated automatically when the allocation is destroyed. +If it is not equal to the expected value, `VMA_ASSERT()` is executed. +It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, +which indicates a serious bug. + +You can also explicitly request checking margins of all allocations in all memory blocks +that belong to specified memory types by using function vmaCheckCorruption(), +or in memory blocks that belong to specified custom pool, by using function +vmaCheckPoolCorruption(). + +Margin validation (corruption detection) works only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. + + +\section debugging_memory_usage_leak_detection Leak detection features + +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. + +\section opengl_interop_exporting_memory Exporting memory + +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: + +You can create \ref custom_memory_pools for such allocations. +Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext +while creating the custom pool. +Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, +not only while creating it, as no copy of the structure is made, +but its original pointer is used for each allocation instead. + +If you want to export all memory allocated by VMA from certain memory types, +also dedicated allocations or other allocations made from default pools, +an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. +It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library +through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type. +Please note that new versions of the library also support dedicated allocations created in custom pools. + +You should not mix these two methods in a way that allows to apply both to the same memory type. +Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`. + + +\section opengl_interop_custom_alignment Custom alignment + +Buffers or images exported to a different API like OpenGL may require a different alignment, +higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. +To impose such alignment: + +You can create \ref custom_memory_pools for such allocations. +Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation +to be made out of this pool. +The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image +from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically. + +If you want to create a buffer with a specific minimum alignment out of default pools, +use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`. + +Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated +allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. + +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + + +\page usage_patterns Recommended usage patterns + +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + +See also slides from talk: +[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) + + +\section usage_patterns_gpu_only GPU-only resource + +When: +Any resources that you frequently write and read on GPU, +e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, +images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). + +What to do: +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +Also consider: +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. +Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. + +\section usage_patterns_staging_copy_upload Staging copy for upload + +When: +A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer +to some GPU resource. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +Also consider: +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback + +When: +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, +e.g. results of some computations. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode + + +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performance of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... + +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); + +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. + + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); +} +else +{ + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize, // size + }; + + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + + +\page configuration Configuration + +Please check "CONFIGURATION SECTION" in the code to find macros that you can define +before each include of this file or change directly in this file to provide +your own implementation of basic facilities like assert, `min()` and `max()` functions, +mutex, atomic etc. + +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + +\section config_Vulkan_functions Pointers to Vulkan functions + +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. + + +\section custom_memory_allocator Custom host memory allocator + +If you use custom allocator for CPU memory rather than default operator `new` +and `delete` from C++, you can make this library using your allocator as well +by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These +functions will be passed to Vulkan, as well as used by the library itself to +make any CPU-side allocations. + +\section allocation_callbacks Device memory allocation callbacks + +The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. +You can setup callbacks to be informed about these calls, e.g. for the purpose +of gathering some statistics. To do it, fill optional member +VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. + +\section heap_memory_limit Device heap memory limit + +When device memory of certain heap runs out of free space, new allocations may +fail (returning error code) or they may succeed, silently pushing some existing_ +memory blocks from GPU VRAM to system RAM (which degrades performance). This +behavior is implementation-dependent - it depends on GPU vendor and graphics +driver. + +On AMD cards it can be controlled while creating Vulkan device object by using +VK_AMD_memory_overallocation_behavior extension, if available. + +Alternatively, if you want to test how your program behaves with limited amount of Vulkan device +memory available without switching your graphics card to one that really has +smaller VRAM, you can use a feature of this library intended for this purpose. +To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. + + + +\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation + +VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve +performance on some GPUs. It augments Vulkan API with possibility to query +driver whether it prefers particular buffer or image to have its own, dedicated +allocation (separate `VkDeviceMemory` block) for better efficiency - to be able +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: + +1 . When creating Vulkan device, check if following 2 device extensions are +supported (call `vkEnumerateDeviceExtensionProperties()`). +If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). + +- VK_KHR_get_memory_requirements2 +- VK_KHR_dedicated_allocation + +If you enabled these extensions: + +2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating +your #VmaAllocator to inform the library that you enabled required extensions +and you want the library to use them. + +\code +allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + +vmaCreateAllocator(&allocatorInfo, &allocator); +\endcode + +That is all. The extension will be automatically used whenever you create a +buffer using vmaCreateBuffer() or image using vmaCreateImage(). + +When using the extension together with Vulkan Validation Layer, you will receive +warnings like this: + +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ + +It is OK, you should just ignore it. It happens because you use function +`vkGetBufferMemoryRequirements2KHR()` instead of standard +`vkGetBufferMemoryRequirements()`, while the validation layer seems to be +unaware of it. + +To learn more about this extension, see: + +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) + + + +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parameters passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with + other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation + from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32 + +On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE` +of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances, +in multiple processes, and/or in multiple APIs. +VMA offers support for it. + +\section vk_khr_external_memory_win32_initialization Initialization + +1) Make sure the extension is defined in the code by including following header before including VMA: + +\code +#include +\endcode + +2) Check if "VK_KHR_external_memory_win32" is available among device extensions. +Enable it when creating the `VkDevice` object. + +3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT +when calling vmaCreateAllocator(). + +4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro +or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly. +For more information, see \ref quick_start_initialization_importing_vulkan_functions. + +\section vk_khr_external_memory_win32_preparations Preparations + +You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`. + +To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain, +and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain. +To make use of them, you need to use \ref custom_memory_pools. Example: + +\code +// Define an example buffer and allocation parameters. +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 0x10000; // Doesn't matter here. +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +exampleBufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo exampleAllocCreateInfo = {}; +exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +// Find memory type index to use for the custom pool. +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator, + &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a custom pool. +constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = { + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo; + +VmaPool pool; +res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyPool(g_Allocator, pool); +\endcode + +Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged +for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block. +No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`. + +\section vk_khr_external_memory_win32_memory_allocation Memory allocation + +Finally, you can create a buffer with an allocation out of the custom pool. +The buffer should use same flags as the sample buffer used to find the memory type. +It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain. + +\code +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = // Your desired buffer size. +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +bufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; // It is enough to set this one member. + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyBuffer(g_Allocator, buf, alloc); +\endcode + +If you need each allocation to have its own device memory block and start at offset 0, you can still do +by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools. + +\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle + +After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to. +VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`. + +\code +HANDLE handle; +res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, you must close the handle. +CloseHandle(handle); +\endcode + +Documentation of the VK_KHR_external_memory_win32 extension states that: + +> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType. + +This is ensured automatically inside VMA. +The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed. +Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close. + +For further information, please check documentation of the vmaGetMemoryWin32Handle() function. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It has been promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate #VmaAllocator objects can be used + independently. + There should be no need to create multiple such objects though - one per `VkDevice` is enough. +- By default, all calls to functions that take #VmaAllocator as first parameter + are safe to call from multiple threads simultaneously because they are + synchronized internally when needed. + This includes allocation and deallocation from default memory pool, as well as custom #VmaPool. +- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT + flag, calls to functions that take such #VmaAllocator object must be + synchronized externally. +- Access to a #VmaAllocation object must be externally synchronized. For example, + you must not call vmaGetAllocationInfo() and vmaMapMemory() from different + threads at the same time if you pass the same #VmaAllocation object to these + functions. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatibility is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_validation_layer_warnings Validation layer warnings + +When using this library, you can meet following types of warnings issued by +Vulkan validation layer. They don't necessarily indicate a bug, so you may need +to just ignore them. + +- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* + - It happens when VK_KHR_dedicated_allocation extension is enabled. + `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. +- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* + - It happens when you map a buffer or image, because the library maps entire + `VkDeviceMemory` block, where different types of images and buffers may end + up together, especially on GPUs with unified memory like Intel. +- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* + - It may happen when you use [defragmentation](@ref defragmentation). + +\section general_considerations_allocation_algorithm Allocation algorithm + +The library uses following algorithm for allocation, in order: + +-# Try to find free range of memory in existing blocks. +-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. +-# If failed, try to create such block with size / 2, size / 4, size / 8. +-# If failed, try to allocate separate `VkDeviceMemory` for this allocation, + just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +-# If failed, choose other memory type that meets the requirements specified in + VmaAllocationCreateInfo and go to point 1. +-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. +*/ -- cgit v1.2.3